aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt21
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h6
-rw-r--r--arch/x86/include/asm/ptrace.h19
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c90
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/vdso/vdso.S1
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c4
21 files changed, 194 insertions, 115 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e279b7242912..78926aa2531c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2680,6 +2680,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2680 vmpoff= [KNL,S390] Perform z/VM CP command after power off. 2680 vmpoff= [KNL,S390] Perform z/VM CP command after power off.
2681 Format: <command> 2681 Format: <command>
2682 2682
2683 vsyscall= [X86-64]
2684 Controls the behavior of vsyscalls (i.e. calls to
2685 fixed addresses of 0xffffffffff600x00 from legacy
2686 code). Most statically-linked binaries and older
2687 versions of glibc use these calls. Because these
2688 functions are at fixed addresses, they make nice
2689 targets for exploits that can control RIP.
2690
2691 emulate [default] Vsyscalls turn into traps and are
2692 emulated reasonably safely.
2693
2694 native Vsyscalls are native syscall instructions.
2695 This is a little bit faster than trapping
2696 and makes a few dynamic recompilers work
2697 better than they would in emulation mode.
2698 It also makes exploits much easier to write.
2699
2700 none Vsyscalls don't work at all. This makes
2701 them quite hard to use for exploits but
2702 might break your system.
2703
2683 vt.cur_default= [VT] Default cursor shape. 2704 vt.cur_default= [VT] Default cursor shape.
2684 Format: 0xCCBBAA, where AA, BB, and CC are the same as 2705 Format: 0xCCBBAA, where AA, BB, and CC are the same as
2685 the parameters of the <Esc>[?A;B;Cc escape sequence; 2706 the parameters of the <Esc>[?A;B;Cc escape sequence;
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 7b439d9aea2a..41935fadfdfc 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
27 27
28 desc->base2 = (info->base_addr & 0xff000000) >> 24; 28 desc->base2 = (info->base_addr & 0xff000000) >> 24;
29 /* 29 /*
30 * Don't allow setting of the lm bit. It is useless anyway 30 * Don't allow setting of the lm bit. It would confuse
31 * because 64bit system calls require __USER_CS: 31 * user_64bit_mode and would get overridden by sysret anyway.
32 */ 32 */
33 desc->l = 0; 33 desc->l = 0;
34} 34}
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f9a320984a10..7e50f06393aa 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,6 @@
17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events 17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
18 * Vectors 32 ... 127 : device interrupts 18 * Vectors 32 ... 127 : device interrupts
19 * Vector 128 : legacy int80 syscall interface 19 * Vector 128 : legacy int80 syscall interface
20 * Vector 204 : legacy x86_64 vsyscall emulation
21 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts 20 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
22 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts 21 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
23 * 22 *
@@ -51,9 +50,6 @@
51#ifdef CONFIG_X86_32 50#ifdef CONFIG_X86_32
52# define SYSCALL_VECTOR 0x80 51# define SYSCALL_VECTOR 0x80
53#endif 52#endif
54#ifdef CONFIG_X86_64
55# define VSYSCALL_EMU_VECTOR 0xcc
56#endif
57 53
58/* 54/*
59 * Vectors 0x30-0x3f are used for ISA interrupts. 55 * Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2c7652163111..8e8b9a4987ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
41 41
42#include <asm/desc_defs.h> 42#include <asm/desc_defs.h>
43#include <asm/kmap_types.h> 43#include <asm/kmap_types.h>
44#include <asm/pgtable_types.h>
44 45
45struct page; 46struct page;
46struct thread_struct; 47struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
63struct pv_info { 64struct pv_info {
64 unsigned int kernel_rpl; 65 unsigned int kernel_rpl;
65 int shared_kernel_pmd; 66 int shared_kernel_pmd;
67
68#ifdef CONFIG_X86_64
69 u16 extra_user_64bit_cs; /* __USER_CS if none */
70#endif
71
66 int paravirt_enabled; 72 int paravirt_enabled;
67 const char *name; 73 const char *name;
68}; 74};
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 94e7618fcac8..35664547125b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
131#ifdef __KERNEL__ 131#ifdef __KERNEL__
132 132
133#include <linux/init.h> 133#include <linux/init.h>
134#ifdef CONFIG_PARAVIRT
135#include <asm/paravirt_types.h>
136#endif
134 137
135struct cpuinfo_x86; 138struct cpuinfo_x86;
136struct task_struct; 139struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
187#endif 190#endif
188} 191}
189 192
193#ifdef CONFIG_X86_64
194static inline bool user_64bit_mode(struct pt_regs *regs)
195{
196#ifndef CONFIG_PARAVIRT
197 /*
198 * On non-paravirt systems, this is the only long mode CPL 3
199 * selector. We do not allow long mode selectors in the LDT.
200 */
201 return regs->cs == __USER_CS;
202#else
203 /* Headers are too twisted for this to go in paravirt.h. */
204 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
205#endif
206}
207#endif
208
190/* 209/*
191 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode 210 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
192 * when it traps. The previous stack will be directly underneath the saved 211 * when it traps. The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 2bae0a513b40..0012d0902c5f 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
40asmlinkage void machine_check(void); 40asmlinkage void machine_check(void);
41#endif /* CONFIG_X86_MCE */ 41#endif /* CONFIG_X86_MCE */
42asmlinkage void simd_coprocessor_error(void); 42asmlinkage void simd_coprocessor_error(void);
43asmlinkage void emulate_vsyscall(void);
44 43
45dotraplinkage void do_divide_error(struct pt_regs *, long); 44dotraplinkage void do_divide_error(struct pt_regs *, long);
46dotraplinkage void do_debug(struct pt_regs *, long); 45dotraplinkage void do_debug(struct pt_regs *, long);
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
67dotraplinkage void do_machine_check(struct pt_regs *, long); 66dotraplinkage void do_machine_check(struct pt_regs *, long);
68#endif 67#endif
69dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); 68dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
70dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
71#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
72dotraplinkage void do_iret_error(struct pt_regs *, long); 70dotraplinkage void do_iret_error(struct pt_regs *, long);
73#endif 71#endif
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 705bf139288c..d92641cc7acc 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
681__SYSCALL(__NR_sendmmsg, sys_sendmmsg) 681__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
682#define __NR_setns 308 682#define __NR_setns 308
683__SYSCALL(__NR_setns, sys_setns) 683__SYSCALL(__NR_setns, sys_setns)
684#define __NR_getcpu 309
685__SYSCALL(__NR_getcpu, sys_getcpu)
684 686
685#ifndef __NO_STUBS 687#ifndef __NO_STUBS
686#define __ARCH_WANT_OLD_READDIR 688#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 60107072c28b..eaea1d31f753 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
27 27
28extern void map_vsyscall(void); 28extern void map_vsyscall(void);
29 29
30/*
31 * Called on instruction fetch fault in vsyscall page.
32 * Returns true if handled.
33 */
34extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
35
30#endif /* __KERNEL__ */ 36#endif /* __KERNEL__ */
31 37
32#endif /* _ASM_X86_VSYSCALL_H */ 38#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 04105574c8e9..82f2912155a5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
17CFLAGS_REMOVE_early_printk.o = -pg 17CFLAGS_REMOVE_early_printk.o = -pg
18endif 18endif
19 19
20#
21# vsyscalls (which work on the user stack) should have
22# no stack-protector checks:
23#
24nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp)
27CFLAGS_paravirt.o := $(nostackp)
28GCOV_PROFILE_vsyscall_64.o := n
29GCOV_PROFILE_hpet.o := n
30GCOV_PROFILE_tsc.o := n
31GCOV_PROFILE_paravirt.o := n
32
33obj-y := process_$(BITS).o signal.o entry_$(BITS).o 20obj-y := process_$(BITS).o signal.o entry_$(BITS).o
34obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 21obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
35obj-y += time.o ioport.o ldt.o dumpstack.o 22obj-y += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d800c8..6419bb05ecd5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1111zeroentry coprocessor_error do_coprocessor_error 1111zeroentry coprocessor_error do_coprocessor_error
1112errorentry alignment_check do_alignment_check 1112errorentry alignment_check do_alignment_check
1113zeroentry simd_coprocessor_error do_simd_coprocessor_error 1113zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114zeroentry emulate_vsyscall do_emulate_vsyscall
1115 1114
1116 1115
1117 /* Reload gs selector with exception handling */ 1116 /* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 613a7931ecc1..d90272e6bc40 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
307 .paravirt_enabled = 0, 307 .paravirt_enabled = 0,
308 .kernel_rpl = 0, 308 .kernel_rpl = 0,
309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
310
311#ifdef CONFIG_X86_64
312 .extra_user_64bit_cs = __USER_CS,
313#endif
310}; 314};
311 315
312struct pv_init_ops pv_init_ops = { 316struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
74 74
75#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
76 case 0x40 ... 0x4f: 76 case 0x40 ... 0x4f:
77 if (regs->cs != __USER_CS) 77 if (!user_64bit_mode(regs))
78 /* 32-bit mode: register increment */ 78 /* 32-bit mode: register increment */
79 return 0; 79 return 0;
80 /* 64-bit mode: REX prefix */ 80 /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9682ec50180c..6913369c234c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
872 set_bit(SYSCALL_VECTOR, used_vectors); 872 set_bit(SYSCALL_VECTOR, used_vectors);
873#endif 873#endif
874 874
875#ifdef CONFIG_X86_64
876 BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
877 set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
878 set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
879#endif
880
881 /* 875 /*
882 * Should be a barrier for any external CPU state: 876 * Should be a barrier for any external CPU state:
883 */ 877 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54a9b76..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(6); /* RW_ */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
76 percpu PT_LOAD FLAGS(6); /* RW_ */ 75 percpu PT_LOAD FLAGS(6); /* RW_ */
77#endif 76#endif
@@ -154,44 +153,16 @@ SECTIONS
154 153
155#ifdef CONFIG_X86_64 154#ifdef CONFIG_X86_64
156 155
157#define VSYSCALL_ADDR (-10*1024*1024) 156 . = ALIGN(PAGE_SIZE);
158
159#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
160#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164
165 . = ALIGN(4096);
166 __vsyscall_0 = .;
167
168 . = VSYSCALL_ADDR;
169 .vsyscall : AT(VLOAD(.vsyscall)) {
170 *(.vsyscall_0)
171
172 . = 1024;
173 *(.vsyscall_1)
174
175 . = 2048;
176 *(.vsyscall_2)
177
178 . = 4096; /* Pad the whole page. */
179 } :user =0xcc
180 . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
181
182#undef VSYSCALL_ADDR
183#undef VLOAD_OFFSET
184#undef VLOAD
185#undef VVIRT_OFFSET
186#undef VVIRT
187
188 __vvar_page = .; 157 __vvar_page = .;
189 158
190 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { 159 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
160 /* work around gold bug 13023 */
161 __vvar_beginning_hack = .;
191 162
192 /* Place all vvars at the offsets in asm/vvar.h. */ 163 /* Place all vvars at the offsets in asm/vvar.h. */
193#define EMIT_VVAR(name, offset) \ 164#define EMIT_VVAR(name, offset) \
194 . = offset; \ 165 . = __vvar_beginning_hack + offset; \
195 *(.vvar_ ## name) 166 *(.vvar_ ## name)
196#define __VVAR_KERNEL_LDS 167#define __VVAR_KERNEL_LDS
197#include <asm/vvar.h> 168#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..18ae83dd1cd7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
18 * use the vDSO. 18 * use the vDSO.
19 */ 19 */
20 20
21/* Disable profiling for userspace code: */
22#define DISABLE_BRANCH_PROFILING
23
24#include <linux/time.h> 21#include <linux/time.h>
25#include <linux/init.h> 22#include <linux/init.h>
26#include <linux/kernel.h> 23#include <linux/kernel.h>
@@ -50,12 +47,36 @@
50#include <asm/vgtod.h> 47#include <asm/vgtod.h>
51#include <asm/traps.h> 48#include <asm/traps.h>
52 49
50#define CREATE_TRACE_POINTS
51#include "vsyscall_trace.h"
52
53DEFINE_VVAR(int, vgetcpu_mode); 53DEFINE_VVAR(int, vgetcpu_mode);
54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
55{ 55{
56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
57}; 57};
58 58
59static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
60
61static int __init vsyscall_setup(char *str)
62{
63 if (str) {
64 if (!strcmp("emulate", str))
65 vsyscall_mode = EMULATE;
66 else if (!strcmp("native", str))
67 vsyscall_mode = NATIVE;
68 else if (!strcmp("none", str))
69 vsyscall_mode = NONE;
70 else
71 return -EINVAL;
72
73 return 0;
74 }
75
76 return -EINVAL;
77}
78early_param("vsyscall", vsyscall_setup);
79
59void update_vsyscall_tz(void) 80void update_vsyscall_tz(void)
60{ 81{
61 unsigned long flags; 82 unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
100 121
101 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 122 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
102 level, tsk->comm, task_pid_nr(tsk), 123 level, tsk->comm, task_pid_nr(tsk),
103 message, regs->ip - 2, regs->cs, 124 message, regs->ip, regs->cs,
104 regs->sp, regs->ax, regs->si, regs->di); 125 regs->sp, regs->ax, regs->si, regs->di);
105} 126}
106 127
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
118 return nr; 139 return nr;
119} 140}
120 141
121void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) 142bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
122{ 143{
123 struct task_struct *tsk; 144 struct task_struct *tsk;
124 unsigned long caller; 145 unsigned long caller;
125 int vsyscall_nr; 146 int vsyscall_nr;
126 long ret; 147 long ret;
127 148
128 local_irq_enable();
129
130 /* 149 /*
131 * Real 64-bit user mode code has cs == __USER_CS. Anything else 150 * No point in checking CS -- the only way to get here is a user mode
132 * is bogus. 151 * trap to a high address, which means that we're in 64-bit user code.
133 */ 152 */
134 if (regs->cs != __USER_CS) {
135 /*
136 * If we trapped from kernel mode, we might as well OOPS now
137 * instead of returning to some random address and OOPSing
138 * then.
139 */
140 BUG_ON(!user_mode(regs));
141 153
142 /* Compat mode and non-compat 32-bit CS should both segfault. */ 154 WARN_ON_ONCE(address != regs->ip);
143 warn_bad_vsyscall(KERN_WARNING, regs, 155
144 "illegal int 0xcc from 32-bit mode"); 156 if (vsyscall_mode == NONE) {
145 goto sigsegv; 157 warn_bad_vsyscall(KERN_INFO, regs,
158 "vsyscall attempted with vsyscall=none");
159 return false;
146 } 160 }
147 161
148 /* 162 vsyscall_nr = addr_to_vsyscall_nr(address);
149 * x86-ism here: regs->ip points to the instruction after the int 0xcc, 163
150 * and int 0xcc is two bytes long. 164 trace_emulate_vsyscall(vsyscall_nr);
151 */ 165
152 vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
153 if (vsyscall_nr < 0) { 166 if (vsyscall_nr < 0) {
154 warn_bad_vsyscall(KERN_WARNING, regs, 167 warn_bad_vsyscall(KERN_WARNING, regs,
155 "illegal int 0xcc (exploit attempt?)"); 168 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
156 goto sigsegv; 169 goto sigsegv;
157 } 170 }
158 171
159 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { 172 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
160 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); 173 warn_bad_vsyscall(KERN_WARNING, regs,
174 "vsyscall with bad stack (exploit attempt?)");
161 goto sigsegv; 175 goto sigsegv;
162 } 176 }
163 177
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
202 regs->ip = caller; 216 regs->ip = caller;
203 regs->sp += 8; 217 regs->sp += 8;
204 218
205 local_irq_disable(); 219 return true;
206 return;
207 220
208sigsegv: 221sigsegv:
209 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
210 force_sig(SIGSEGV, current); 222 force_sig(SIGSEGV, current);
211 local_irq_disable(); 223 return true;
212} 224}
213 225
214/* 226/*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
256 268
257void __init map_vsyscall(void) 269void __init map_vsyscall(void)
258{ 270{
259 extern char __vsyscall_0; 271 extern char __vsyscall_page;
260 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 272 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
261 extern char __vvar_page; 273 extern char __vvar_page;
262 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 274 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
263 275
264 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ 276 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
265 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 277 vsyscall_mode == NATIVE
278 ? PAGE_KERNEL_VSYSCALL
279 : PAGE_KERNEL_VVAR);
280 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
281 (unsigned long)VSYSCALL_START);
282
266 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); 283 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
267 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); 284 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
285 (unsigned long)VVAR_ADDRESS);
268} 286}
269 287
270static int __init vsyscall_init(void) 288static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10
10#include <asm/irq_vectors.h> 11#include <asm/irq_vectors.h>
12#include <asm/page_types.h>
13#include <asm/unistd_64.h>
14
15__PAGE_ALIGNED_DATA
16 .globl __vsyscall_page
17 .balign PAGE_SIZE, 0xcc
18 .type __vsyscall_page, @object
19__vsyscall_page:
20
21 mov $__NR_gettimeofday, %rax
22 syscall
23 ret
11 24
12/* The unused parts of the page are filled with 0xcc by the linker script. */ 25 .balign 1024, 0xcc
26 mov $__NR_time, %rax
27 syscall
28 ret
13 29
14.section .vsyscall_0, "a" 30 .balign 1024, 0xcc
15ENTRY(vsyscall_0) 31 mov $__NR_getcpu, %rax
16 int $VSYSCALL_EMU_VECTOR 32 syscall
17END(vsyscall_0) 33 ret
18 34
19.section .vsyscall_1, "a" 35 .balign 4096, 0xcc
20ENTRY(vsyscall_1)
21 int $VSYSCALL_EMU_VECTOR
22END(vsyscall_1)
23 36
24.section .vsyscall_2, "a" 37 .size __vsyscall_page, 4096
25ENTRY(vsyscall_2)
26 int $VSYSCALL_EMU_VECTOR
27END(vsyscall_2)
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 000000000000..a8b2edec54fe
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM vsyscall
3
4#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
5#define __VSYSCALL_TRACE_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(emulate_vsyscall,
10
11 TP_PROTO(int nr),
12
13 TP_ARGS(nr),
14
15 TP_STRUCT__entry(__field(int, nr)),
16
17 TP_fast_assign(
18 __entry->nr = nr;
19 ),
20
21 TP_printk("nr = %d", __entry->nr)
22);
23
24#endif
25
26#undef TRACE_INCLUDE_PATH
27#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
28#define TRACE_INCLUDE_FILE vsyscall_trace
29#include <trace/define_trace.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4d09df054e39..247aae3dc008 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
105 * but for now it's good enough to assume that long 105 * but for now it's good enough to assume that long
106 * mode only uses well known segments or kernel. 106 * mode only uses well known segments or kernel.
107 */ 107 */
108 return (!user_mode(regs)) || (regs->cs == __USER_CS); 108 return (!user_mode(regs) || user_64bit_mode(regs));
109#endif 109#endif
110 case 0x60: 110 case 0x60:
111 /* 0x64 thru 0x67 are valid prefixes in all modes. */ 111 /* 0x64 thru 0x67 are valid prefixes in all modes. */
@@ -720,6 +720,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
720 if (is_errata100(regs, address)) 720 if (is_errata100(regs, address))
721 return; 721 return;
722 722
723#ifdef CONFIG_X86_64
724 /*
725 * Instruction fetch faults in the vsyscall page might need
726 * emulation.
727 */
728 if (unlikely((error_code & PF_INSTR) &&
729 ((address & ~0xfff) == VSYSCALL_START))) {
730 if (emulate_vsyscall(regs, address))
731 return;
732 }
733#endif
734
723 if (unlikely(show_unhandled_signals)) 735 if (unlikely(show_unhandled_signals))
724 show_signal_msg(regs, error_code, address, tsk); 736 show_signal_msg(regs, error_code, address, tsk);
725 737
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1b979c12ba85..01f5e3b4613c 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
9vdso_start: 9vdso_start:
10 .incbin "arch/x86/vdso/vdso.so" 10 .incbin "arch/x86/vdso/vdso.so"
11vdso_end: 11vdso_end:
12 .align PAGE_SIZE /* extra data here leaks to userspace. */
12 13
13.previous 14.previous
14 15
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528458a0..e2345af01af0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
951 .paravirt_enabled = 1, 951 .paravirt_enabled = 1,
952 .shared_kernel_pmd = 0, 952 .shared_kernel_pmd = 0,
953 953
954#ifdef CONFIG_X86_64
955 .extra_user_64bit_cs = FLAT_USER_CS64,
956#endif
957
954 .name = "Xen", 958 .name = "Xen",
955}; 959};
956 960
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f987bde77c49..8cce339db5e7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1916# endif 1916# endif
1917#else 1917#else
1918 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 1918 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1919 case VVAR_PAGE:
1919#endif 1920#endif
1920 case FIX_TEXT_POKE0: 1921 case FIX_TEXT_POKE0:
1921 case FIX_TEXT_POKE1: 1922 case FIX_TEXT_POKE1:
@@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1956#ifdef CONFIG_X86_64 1957#ifdef CONFIG_X86_64
1957 /* Replicate changes to map the vsyscall page into the user 1958 /* Replicate changes to map the vsyscall page into the user
1958 pagetable vsyscall mapping. */ 1959 pagetable vsyscall mapping. */
1959 if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { 1960 if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
1961 idx == VVAR_PAGE) {
1960 unsigned long vaddr = __fix_to_virt(idx); 1962 unsigned long vaddr = __fix_to_virt(idx);
1961 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 1963 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
1962 } 1964 }