aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-08-12 23:46:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-12 23:46:24 -0400
commit06e727d2a5d9d889fabad35223ad77205a9bebb9 (patch)
tree2a2d4ec9ed95c95f044c8d69e87ab47195a1d2ed /arch/x86
parente68ff9cd15552e46e0f993eace25af0947b1222d (diff)
parent3ae36655b97a03fa1decf72f04078ef945647c1a (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip: x86-64: Rework vsyscall emulation and add vsyscall= parameter x86-64: Wire up getcpu syscall x86: Remove unnecessary compile flag tweaks for vsyscall code x86-64: Add vsyscall:emulate_vsyscall trace event x86-64: Add user_64bit_mode paravirt op x86-64, xen: Enable the vvar mapping x86-64: Work around gold bug 13023 x86-64: Move the "user" vsyscall segment out of the data segment. x86-64: Pad vDSO to a page boundary
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h6
-rw-r--r--arch/x86/include/asm/ptrace.h19
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c90
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/vdso/vdso.S1
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c4
20 files changed, 173 insertions, 115 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 7b439d9aea2a..41935fadfdfc 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
27 27
28 desc->base2 = (info->base_addr & 0xff000000) >> 24; 28 desc->base2 = (info->base_addr & 0xff000000) >> 24;
29 /* 29 /*
30 * Don't allow setting of the lm bit. It is useless anyway 30 * Don't allow setting of the lm bit. It would confuse
31 * because 64bit system calls require __USER_CS: 31 * user_64bit_mode and would get overridden by sysret anyway.
32 */ 32 */
33 desc->l = 0; 33 desc->l = 0;
34} 34}
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f9a320984a10..7e50f06393aa 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,6 @@
17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events 17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
18 * Vectors 32 ... 127 : device interrupts 18 * Vectors 32 ... 127 : device interrupts
19 * Vector 128 : legacy int80 syscall interface 19 * Vector 128 : legacy int80 syscall interface
20 * Vector 204 : legacy x86_64 vsyscall emulation
21 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts 20 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
22 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts 21 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
23 * 22 *
@@ -51,9 +50,6 @@
51#ifdef CONFIG_X86_32 50#ifdef CONFIG_X86_32
52# define SYSCALL_VECTOR 0x80 51# define SYSCALL_VECTOR 0x80
53#endif 52#endif
54#ifdef CONFIG_X86_64
55# define VSYSCALL_EMU_VECTOR 0xcc
56#endif
57 53
58/* 54/*
59 * Vectors 0x30-0x3f are used for ISA interrupts. 55 * Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2c7652163111..8e8b9a4987ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
41 41
42#include <asm/desc_defs.h> 42#include <asm/desc_defs.h>
43#include <asm/kmap_types.h> 43#include <asm/kmap_types.h>
44#include <asm/pgtable_types.h>
44 45
45struct page; 46struct page;
46struct thread_struct; 47struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
63struct pv_info { 64struct pv_info {
64 unsigned int kernel_rpl; 65 unsigned int kernel_rpl;
65 int shared_kernel_pmd; 66 int shared_kernel_pmd;
67
68#ifdef CONFIG_X86_64
69 u16 extra_user_64bit_cs; /* __USER_CS if none */
70#endif
71
66 int paravirt_enabled; 72 int paravirt_enabled;
67 const char *name; 73 const char *name;
68}; 74};
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 94e7618fcac8..35664547125b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
131#ifdef __KERNEL__ 131#ifdef __KERNEL__
132 132
133#include <linux/init.h> 133#include <linux/init.h>
134#ifdef CONFIG_PARAVIRT
135#include <asm/paravirt_types.h>
136#endif
134 137
135struct cpuinfo_x86; 138struct cpuinfo_x86;
136struct task_struct; 139struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
187#endif 190#endif
188} 191}
189 192
193#ifdef CONFIG_X86_64
194static inline bool user_64bit_mode(struct pt_regs *regs)
195{
196#ifndef CONFIG_PARAVIRT
197 /*
198 * On non-paravirt systems, this is the only long mode CPL 3
199 * selector. We do not allow long mode selectors in the LDT.
200 */
201 return regs->cs == __USER_CS;
202#else
203 /* Headers are too twisted for this to go in paravirt.h. */
204 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
205#endif
206}
207#endif
208
190/* 209/*
191 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode 210 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
192 * when it traps. The previous stack will be directly underneath the saved 211 * when it traps. The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 2bae0a513b40..0012d0902c5f 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
40asmlinkage void machine_check(void); 40asmlinkage void machine_check(void);
41#endif /* CONFIG_X86_MCE */ 41#endif /* CONFIG_X86_MCE */
42asmlinkage void simd_coprocessor_error(void); 42asmlinkage void simd_coprocessor_error(void);
43asmlinkage void emulate_vsyscall(void);
44 43
45dotraplinkage void do_divide_error(struct pt_regs *, long); 44dotraplinkage void do_divide_error(struct pt_regs *, long);
46dotraplinkage void do_debug(struct pt_regs *, long); 45dotraplinkage void do_debug(struct pt_regs *, long);
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
67dotraplinkage void do_machine_check(struct pt_regs *, long); 66dotraplinkage void do_machine_check(struct pt_regs *, long);
68#endif 67#endif
69dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); 68dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
70dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
71#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
72dotraplinkage void do_iret_error(struct pt_regs *, long); 70dotraplinkage void do_iret_error(struct pt_regs *, long);
73#endif 71#endif
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 705bf139288c..d92641cc7acc 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
681__SYSCALL(__NR_sendmmsg, sys_sendmmsg) 681__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
682#define __NR_setns 308 682#define __NR_setns 308
683__SYSCALL(__NR_setns, sys_setns) 683__SYSCALL(__NR_setns, sys_setns)
684#define __NR_getcpu 309
685__SYSCALL(__NR_getcpu, sys_getcpu)
684 686
685#ifndef __NO_STUBS 687#ifndef __NO_STUBS
686#define __ARCH_WANT_OLD_READDIR 688#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 60107072c28b..eaea1d31f753 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
27 27
28extern void map_vsyscall(void); 28extern void map_vsyscall(void);
29 29
30/*
31 * Called on instruction fetch fault in vsyscall page.
32 * Returns true if handled.
33 */
34extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
35
30#endif /* __KERNEL__ */ 36#endif /* __KERNEL__ */
31 37
32#endif /* _ASM_X86_VSYSCALL_H */ 38#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 04105574c8e9..82f2912155a5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
17CFLAGS_REMOVE_early_printk.o = -pg 17CFLAGS_REMOVE_early_printk.o = -pg
18endif 18endif
19 19
20#
21# vsyscalls (which work on the user stack) should have
22# no stack-protector checks:
23#
24nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp)
27CFLAGS_paravirt.o := $(nostackp)
28GCOV_PROFILE_vsyscall_64.o := n
29GCOV_PROFILE_hpet.o := n
30GCOV_PROFILE_tsc.o := n
31GCOV_PROFILE_paravirt.o := n
32
33obj-y := process_$(BITS).o signal.o entry_$(BITS).o 20obj-y := process_$(BITS).o signal.o entry_$(BITS).o
34obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 21obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
35obj-y += time.o ioport.o ldt.o dumpstack.o 22obj-y += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d800c8..6419bb05ecd5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1111zeroentry coprocessor_error do_coprocessor_error 1111zeroentry coprocessor_error do_coprocessor_error
1112errorentry alignment_check do_alignment_check 1112errorentry alignment_check do_alignment_check
1113zeroentry simd_coprocessor_error do_simd_coprocessor_error 1113zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114zeroentry emulate_vsyscall do_emulate_vsyscall
1115 1114
1116 1115
1117 /* Reload gs selector with exception handling */ 1116 /* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 613a7931ecc1..d90272e6bc40 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
307 .paravirt_enabled = 0, 307 .paravirt_enabled = 0,
308 .kernel_rpl = 0, 308 .kernel_rpl = 0,
309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
310
311#ifdef CONFIG_X86_64
312 .extra_user_64bit_cs = __USER_CS,
313#endif
310}; 314};
311 315
312struct pv_init_ops pv_init_ops = { 316struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
74 74
75#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
76 case 0x40 ... 0x4f: 76 case 0x40 ... 0x4f:
77 if (regs->cs != __USER_CS) 77 if (!user_64bit_mode(regs))
78 /* 32-bit mode: register increment */ 78 /* 32-bit mode: register increment */
79 return 0; 79 return 0;
80 /* 64-bit mode: REX prefix */ 80 /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9682ec50180c..6913369c234c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
872 set_bit(SYSCALL_VECTOR, used_vectors); 872 set_bit(SYSCALL_VECTOR, used_vectors);
873#endif 873#endif
874 874
875#ifdef CONFIG_X86_64
876 BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
877 set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
878 set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
879#endif
880
881 /* 875 /*
882 * Should be a barrier for any external CPU state: 876 * Should be a barrier for any external CPU state:
883 */ 877 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54a9b76..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(6); /* RW_ */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
76 percpu PT_LOAD FLAGS(6); /* RW_ */ 75 percpu PT_LOAD FLAGS(6); /* RW_ */
77#endif 76#endif
@@ -154,44 +153,16 @@ SECTIONS
154 153
155#ifdef CONFIG_X86_64 154#ifdef CONFIG_X86_64
156 155
157#define VSYSCALL_ADDR (-10*1024*1024) 156 . = ALIGN(PAGE_SIZE);
158
159#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
160#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164
165 . = ALIGN(4096);
166 __vsyscall_0 = .;
167
168 . = VSYSCALL_ADDR;
169 .vsyscall : AT(VLOAD(.vsyscall)) {
170 *(.vsyscall_0)
171
172 . = 1024;
173 *(.vsyscall_1)
174
175 . = 2048;
176 *(.vsyscall_2)
177
178 . = 4096; /* Pad the whole page. */
179 } :user =0xcc
180 . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
181
182#undef VSYSCALL_ADDR
183#undef VLOAD_OFFSET
184#undef VLOAD
185#undef VVIRT_OFFSET
186#undef VVIRT
187
188 __vvar_page = .; 157 __vvar_page = .;
189 158
190 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { 159 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
160 /* work around gold bug 13023 */
161 __vvar_beginning_hack = .;
191 162
192 /* Place all vvars at the offsets in asm/vvar.h. */ 163 /* Place all vvars at the offsets in asm/vvar.h. */
193#define EMIT_VVAR(name, offset) \ 164#define EMIT_VVAR(name, offset) \
194 . = offset; \ 165 . = __vvar_beginning_hack + offset; \
195 *(.vvar_ ## name) 166 *(.vvar_ ## name)
196#define __VVAR_KERNEL_LDS 167#define __VVAR_KERNEL_LDS
197#include <asm/vvar.h> 168#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..18ae83dd1cd7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
18 * use the vDSO. 18 * use the vDSO.
19 */ 19 */
20 20
21/* Disable profiling for userspace code: */
22#define DISABLE_BRANCH_PROFILING
23
24#include <linux/time.h> 21#include <linux/time.h>
25#include <linux/init.h> 22#include <linux/init.h>
26#include <linux/kernel.h> 23#include <linux/kernel.h>
@@ -50,12 +47,36 @@
50#include <asm/vgtod.h> 47#include <asm/vgtod.h>
51#include <asm/traps.h> 48#include <asm/traps.h>
52 49
50#define CREATE_TRACE_POINTS
51#include "vsyscall_trace.h"
52
53DEFINE_VVAR(int, vgetcpu_mode); 53DEFINE_VVAR(int, vgetcpu_mode);
54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
55{ 55{
56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
57}; 57};
58 58
59static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
60
61static int __init vsyscall_setup(char *str)
62{
63 if (str) {
64 if (!strcmp("emulate", str))
65 vsyscall_mode = EMULATE;
66 else if (!strcmp("native", str))
67 vsyscall_mode = NATIVE;
68 else if (!strcmp("none", str))
69 vsyscall_mode = NONE;
70 else
71 return -EINVAL;
72
73 return 0;
74 }
75
76 return -EINVAL;
77}
78early_param("vsyscall", vsyscall_setup);
79
59void update_vsyscall_tz(void) 80void update_vsyscall_tz(void)
60{ 81{
61 unsigned long flags; 82 unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
100 121
101 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 122 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
102 level, tsk->comm, task_pid_nr(tsk), 123 level, tsk->comm, task_pid_nr(tsk),
103 message, regs->ip - 2, regs->cs, 124 message, regs->ip, regs->cs,
104 regs->sp, regs->ax, regs->si, regs->di); 125 regs->sp, regs->ax, regs->si, regs->di);
105} 126}
106 127
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
118 return nr; 139 return nr;
119} 140}
120 141
121void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) 142bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
122{ 143{
123 struct task_struct *tsk; 144 struct task_struct *tsk;
124 unsigned long caller; 145 unsigned long caller;
125 int vsyscall_nr; 146 int vsyscall_nr;
126 long ret; 147 long ret;
127 148
128 local_irq_enable();
129
130 /* 149 /*
131 * Real 64-bit user mode code has cs == __USER_CS. Anything else 150 * No point in checking CS -- the only way to get here is a user mode
132 * is bogus. 151 * trap to a high address, which means that we're in 64-bit user code.
133 */ 152 */
134 if (regs->cs != __USER_CS) {
135 /*
136 * If we trapped from kernel mode, we might as well OOPS now
137 * instead of returning to some random address and OOPSing
138 * then.
139 */
140 BUG_ON(!user_mode(regs));
141 153
142 /* Compat mode and non-compat 32-bit CS should both segfault. */ 154 WARN_ON_ONCE(address != regs->ip);
143 warn_bad_vsyscall(KERN_WARNING, regs, 155
144 "illegal int 0xcc from 32-bit mode"); 156 if (vsyscall_mode == NONE) {
145 goto sigsegv; 157 warn_bad_vsyscall(KERN_INFO, regs,
158 "vsyscall attempted with vsyscall=none");
159 return false;
146 } 160 }
147 161
148 /* 162 vsyscall_nr = addr_to_vsyscall_nr(address);
149 * x86-ism here: regs->ip points to the instruction after the int 0xcc, 163
150 * and int 0xcc is two bytes long. 164 trace_emulate_vsyscall(vsyscall_nr);
151 */ 165
152 vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
153 if (vsyscall_nr < 0) { 166 if (vsyscall_nr < 0) {
154 warn_bad_vsyscall(KERN_WARNING, regs, 167 warn_bad_vsyscall(KERN_WARNING, regs,
155 "illegal int 0xcc (exploit attempt?)"); 168 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
156 goto sigsegv; 169 goto sigsegv;
157 } 170 }
158 171
159 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { 172 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
160 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); 173 warn_bad_vsyscall(KERN_WARNING, regs,
174 "vsyscall with bad stack (exploit attempt?)");
161 goto sigsegv; 175 goto sigsegv;
162 } 176 }
163 177
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
202 regs->ip = caller; 216 regs->ip = caller;
203 regs->sp += 8; 217 regs->sp += 8;
204 218
205 local_irq_disable(); 219 return true;
206 return;
207 220
208sigsegv: 221sigsegv:
209 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
210 force_sig(SIGSEGV, current); 222 force_sig(SIGSEGV, current);
211 local_irq_disable(); 223 return true;
212} 224}
213 225
214/* 226/*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
256 268
257void __init map_vsyscall(void) 269void __init map_vsyscall(void)
258{ 270{
259 extern char __vsyscall_0; 271 extern char __vsyscall_page;
260 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 272 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
261 extern char __vvar_page; 273 extern char __vvar_page;
262 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 274 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
263 275
264 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ 276 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
265 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 277 vsyscall_mode == NATIVE
278 ? PAGE_KERNEL_VSYSCALL
279 : PAGE_KERNEL_VVAR);
280 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
281 (unsigned long)VSYSCALL_START);
282
266 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); 283 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
267 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); 284 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
285 (unsigned long)VVAR_ADDRESS);
268} 286}
269 287
270static int __init vsyscall_init(void) 288static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10
10#include <asm/irq_vectors.h> 11#include <asm/irq_vectors.h>
12#include <asm/page_types.h>
13#include <asm/unistd_64.h>
14
15__PAGE_ALIGNED_DATA
16 .globl __vsyscall_page
17 .balign PAGE_SIZE, 0xcc
18 .type __vsyscall_page, @object
19__vsyscall_page:
20
21 mov $__NR_gettimeofday, %rax
22 syscall
23 ret
11 24
12/* The unused parts of the page are filled with 0xcc by the linker script. */ 25 .balign 1024, 0xcc
26 mov $__NR_time, %rax
27 syscall
28 ret
13 29
14.section .vsyscall_0, "a" 30 .balign 1024, 0xcc
15ENTRY(vsyscall_0) 31 mov $__NR_getcpu, %rax
16 int $VSYSCALL_EMU_VECTOR 32 syscall
17END(vsyscall_0) 33 ret
18 34
19.section .vsyscall_1, "a" 35 .balign 4096, 0xcc
20ENTRY(vsyscall_1)
21 int $VSYSCALL_EMU_VECTOR
22END(vsyscall_1)
23 36
24.section .vsyscall_2, "a" 37 .size __vsyscall_page, 4096
25ENTRY(vsyscall_2)
26 int $VSYSCALL_EMU_VECTOR
27END(vsyscall_2)
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 000000000000..a8b2edec54fe
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM vsyscall
3
4#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
5#define __VSYSCALL_TRACE_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(emulate_vsyscall,
10
11 TP_PROTO(int nr),
12
13 TP_ARGS(nr),
14
15 TP_STRUCT__entry(__field(int, nr)),
16
17 TP_fast_assign(
18 __entry->nr = nr;
19 ),
20
21 TP_printk("nr = %d", __entry->nr)
22);
23
24#endif
25
26#undef TRACE_INCLUDE_PATH
27#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
28#define TRACE_INCLUDE_FILE vsyscall_trace
29#include <trace/define_trace.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4d09df054e39..247aae3dc008 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
105 * but for now it's good enough to assume that long 105 * but for now it's good enough to assume that long
106 * mode only uses well known segments or kernel. 106 * mode only uses well known segments or kernel.
107 */ 107 */
108 return (!user_mode(regs)) || (regs->cs == __USER_CS); 108 return (!user_mode(regs) || user_64bit_mode(regs));
109#endif 109#endif
110 case 0x60: 110 case 0x60:
111 /* 0x64 thru 0x67 are valid prefixes in all modes. */ 111 /* 0x64 thru 0x67 are valid prefixes in all modes. */
@@ -720,6 +720,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
720 if (is_errata100(regs, address)) 720 if (is_errata100(regs, address))
721 return; 721 return;
722 722
723#ifdef CONFIG_X86_64
724 /*
725 * Instruction fetch faults in the vsyscall page might need
726 * emulation.
727 */
728 if (unlikely((error_code & PF_INSTR) &&
729 ((address & ~0xfff) == VSYSCALL_START))) {
730 if (emulate_vsyscall(regs, address))
731 return;
732 }
733#endif
734
723 if (unlikely(show_unhandled_signals)) 735 if (unlikely(show_unhandled_signals))
724 show_signal_msg(regs, error_code, address, tsk); 736 show_signal_msg(regs, error_code, address, tsk);
725 737
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1b979c12ba85..01f5e3b4613c 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
9vdso_start: 9vdso_start:
10 .incbin "arch/x86/vdso/vdso.so" 10 .incbin "arch/x86/vdso/vdso.so"
11vdso_end: 11vdso_end:
12 .align PAGE_SIZE /* extra data here leaks to userspace. */
12 13
13.previous 14.previous
14 15
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528458a0..e2345af01af0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
951 .paravirt_enabled = 1, 951 .paravirt_enabled = 1,
952 .shared_kernel_pmd = 0, 952 .shared_kernel_pmd = 0,
953 953
954#ifdef CONFIG_X86_64
955 .extra_user_64bit_cs = FLAT_USER_CS64,
956#endif
957
954 .name = "Xen", 958 .name = "Xen",
955}; 959};
956 960
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f987bde77c49..8cce339db5e7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1916# endif 1916# endif
1917#else 1917#else
1918 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 1918 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1919 case VVAR_PAGE:
1919#endif 1920#endif
1920 case FIX_TEXT_POKE0: 1921 case FIX_TEXT_POKE0:
1921 case FIX_TEXT_POKE1: 1922 case FIX_TEXT_POKE1:
@@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1956#ifdef CONFIG_X86_64 1957#ifdef CONFIG_X86_64
1957 /* Replicate changes to map the vsyscall page into the user 1958 /* Replicate changes to map the vsyscall page into the user
1958 pagetable vsyscall mapping. */ 1959 pagetable vsyscall mapping. */
1959 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { 1960 if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
1961 idx == VVAR_PAGE) {
1960 unsigned long vaddr = __fix_to_virt(idx); 1962 unsigned long vaddr = __fix_to_virt(idx);
1961 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 1963 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1962 } 1964 }