aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@mit.edu>2011-08-10 11:15:32 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-08-10 20:26:46 -0400
commit3ae36655b97a03fa1decf72f04078ef945647c1a (patch)
treef6f5fbc743909d66e6823c9325ddba9d9d3646ad /arch/x86/kernel
parentfce8dc06423d6fb2709469dc5c55b04e09c1d126 (diff)
x86-64: Rework vsyscall emulation and add vsyscall= parameter
There are three choices: vsyscall=native: Vsyscalls are native code that issues the corresponding syscalls. vsyscall=emulate (default): Vsyscalls are emulated by instruction fault traps, tested in the bad_area path. The actual contents of the vsyscall page is the same as the vsyscall=native case except that it's marked NX. This way programs that make assumptions about what the code in the page does will not be confused when they read that code. vsyscall=none: Trying to execute a vsyscall will segfault. Signed-off-by: Andy Lutomirski <luto@mit.edu> Link: http://lkml.kernel.org/r/8449fb3abf89851fd6b2260972666a6f82542284.1312988155.git.luto@mit.edu Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S33
-rw-r--r--arch/x86/kernel/vsyscall_64.c79
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
5 files changed, 72 insertions, 83 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e949793d6b93..46792d900018 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1123,7 +1123,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1123zeroentry coprocessor_error do_coprocessor_error 1123zeroentry coprocessor_error do_coprocessor_error
1124errorentry alignment_check do_alignment_check 1124errorentry alignment_check do_alignment_check
1125zeroentry simd_coprocessor_error do_simd_coprocessor_error 1125zeroentry simd_coprocessor_error do_simd_coprocessor_error
1126zeroentry emulate_vsyscall do_emulate_vsyscall
1127 1126
1128 1127
1129 /* Reload gs selector with exception handling */ 1128 /* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fbc097a085ca..b9b67166f9de 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
872 set_bit(SYSCALL_VECTOR, used_vectors); 872 set_bit(SYSCALL_VECTOR, used_vectors);
873#endif 873#endif
874 874
875#ifdef CONFIG_X86_64
876 BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
877 set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
878 set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
879#endif
880
881 /* 875 /*
882 * Should be a barrier for any external CPU state: 876 * Should be a barrier for any external CPU state:
883 */ 877 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8f3a265476d7..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(6); /* RW_ */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
76 percpu PT_LOAD FLAGS(6); /* RW_ */ 75 percpu PT_LOAD FLAGS(6); /* RW_ */
77#endif 76#endif
@@ -174,38 +173,6 @@ SECTIONS
174 173
175 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); 174 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
176 175
177#define VSYSCALL_ADDR (-10*1024*1024)
178
179#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
180#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
181
182#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
183#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
184
185 __vsyscall_0 = .;
186
187 . = VSYSCALL_ADDR;
188 .vsyscall : AT(VLOAD(.vsyscall)) {
189 /* work around gold bug 13023 */
190 __vsyscall_beginning_hack = .;
191 *(.vsyscall_0)
192
193 . = __vsyscall_beginning_hack + 1024;
194 *(.vsyscall_1)
195
196 . = __vsyscall_beginning_hack + 2048;
197 *(.vsyscall_2)
198
199 . = __vsyscall_beginning_hack + 4096; /* Pad the whole page. */
200 } :user =0xcc
201 . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
202
203#undef VSYSCALL_ADDR
204#undef VLOAD_OFFSET
205#undef VLOAD
206#undef VVIRT_OFFSET
207#undef VVIRT
208
209#endif /* CONFIG_X86_64 */ 176#endif /* CONFIG_X86_64 */
210 177
211 /* Init code and data - will be freed after init */ 178 /* Init code and data - will be freed after init */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index bf8e9ffee6e9..18ae83dd1cd7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -56,6 +56,27 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
57}; 57};
58 58
59static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
60
61static int __init vsyscall_setup(char *str)
62{
63 if (str) {
64 if (!strcmp("emulate", str))
65 vsyscall_mode = EMULATE;
66 else if (!strcmp("native", str))
67 vsyscall_mode = NATIVE;
68 else if (!strcmp("none", str))
69 vsyscall_mode = NONE;
70 else
71 return -EINVAL;
72
73 return 0;
74 }
75
76 return -EINVAL;
77}
78early_param("vsyscall", vsyscall_setup);
79
59void update_vsyscall_tz(void) 80void update_vsyscall_tz(void)
60{ 81{
61 unsigned long flags; 82 unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
100 121
101 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 122 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
102 level, tsk->comm, task_pid_nr(tsk), 123 level, tsk->comm, task_pid_nr(tsk),
103 message, regs->ip - 2, regs->cs, 124 message, regs->ip, regs->cs,
104 regs->sp, regs->ax, regs->si, regs->di); 125 regs->sp, regs->ax, regs->si, regs->di);
105} 126}
106 127
@@ -118,45 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
118 return nr; 139 return nr;
119} 140}
120 141
121void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) 142bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
122{ 143{
123 struct task_struct *tsk; 144 struct task_struct *tsk;
124 unsigned long caller; 145 unsigned long caller;
125 int vsyscall_nr; 146 int vsyscall_nr;
126 long ret; 147 long ret;
127 148
128 local_irq_enable(); 149 /*
150 * No point in checking CS -- the only way to get here is a user mode
151 * trap to a high address, which means that we're in 64-bit user code.
152 */
129 153
130 if (!user_64bit_mode(regs)) { 154 WARN_ON_ONCE(address != regs->ip);
131 /*
132 * If we trapped from kernel mode, we might as well OOPS now
133 * instead of returning to some random address and OOPSing
134 * then.
135 */
136 BUG_ON(!user_mode(regs));
137 155
138 /* Compat mode and non-compat 32-bit CS should both segfault. */ 156 if (vsyscall_mode == NONE) {
139 warn_bad_vsyscall(KERN_WARNING, regs, 157 warn_bad_vsyscall(KERN_INFO, regs,
140 "illegal int 0xcc from 32-bit mode"); 158 "vsyscall attempted with vsyscall=none");
141 goto sigsegv; 159 return false;
142 } 160 }
143 161
144 /* 162 vsyscall_nr = addr_to_vsyscall_nr(address);
145 * x86-ism here: regs->ip points to the instruction after the int 0xcc,
146 * and int 0xcc is two bytes long.
147 */
148 vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
149 163
150 trace_emulate_vsyscall(vsyscall_nr); 164 trace_emulate_vsyscall(vsyscall_nr);
151 165
152 if (vsyscall_nr < 0) { 166 if (vsyscall_nr < 0) {
153 warn_bad_vsyscall(KERN_WARNING, regs, 167 warn_bad_vsyscall(KERN_WARNING, regs,
154 "illegal int 0xcc (exploit attempt?)"); 168 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
155 goto sigsegv; 169 goto sigsegv;
156 } 170 }
157 171
158 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { 172 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
159 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); 173 warn_bad_vsyscall(KERN_WARNING, regs,
174 "vsyscall with bad stack (exploit attempt?)");
160 goto sigsegv; 175 goto sigsegv;
161 } 176 }
162 177
@@ -201,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
201 regs->ip = caller; 216 regs->ip = caller;
202 regs->sp += 8; 217 regs->sp += 8;
203 218
204 local_irq_disable(); 219 return true;
205 return;
206 220
207sigsegv: 221sigsegv:
208 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
209 force_sig(SIGSEGV, current); 222 force_sig(SIGSEGV, current);
210 local_irq_disable(); 223 return true;
211} 224}
212 225
213/* 226/*
@@ -255,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
255 268
256void __init map_vsyscall(void) 269void __init map_vsyscall(void)
257{ 270{
258 extern char __vsyscall_0; 271 extern char __vsyscall_page;
259 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 272 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
260 extern char __vvar_page; 273 extern char __vvar_page;
261 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 274 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
262 275
263 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ 276 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
264 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 277 vsyscall_mode == NATIVE
278 ? PAGE_KERNEL_VSYSCALL
279 : PAGE_KERNEL_VVAR);
280 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
281 (unsigned long)VSYSCALL_START);
282
265 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); 283 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
266 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); 284 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
285 (unsigned long)VVAR_ADDRESS);
267} 286}
268 287
269static int __init vsyscall_init(void) 288static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10
10#include <asm/irq_vectors.h> 11#include <asm/irq_vectors.h>
12#include <asm/page_types.h>
13#include <asm/unistd_64.h>
14
15__PAGE_ALIGNED_DATA
16 .globl __vsyscall_page
17 .balign PAGE_SIZE, 0xcc
18 .type __vsyscall_page, @object
19__vsyscall_page:
20
21 mov $__NR_gettimeofday, %rax
22 syscall
23 ret
11 24
12/* The unused parts of the page are filled with 0xcc by the linker script. */ 25 .balign 1024, 0xcc
26 mov $__NR_time, %rax
27 syscall
28 ret
13 29
14.section .vsyscall_0, "a" 30 .balign 1024, 0xcc
15ENTRY(vsyscall_0) 31 mov $__NR_getcpu, %rax
16 int $VSYSCALL_EMU_VECTOR 32 syscall
17END(vsyscall_0) 33 ret
18 34
19.section .vsyscall_1, "a" 35 .balign 4096, 0xcc
20ENTRY(vsyscall_1)
21 int $VSYSCALL_EMU_VECTOR
22END(vsyscall_1)
23 36
24.section .vsyscall_2, "a" 37 .size __vsyscall_page, 4096
25ENTRY(vsyscall_2)
26 int $VSYSCALL_EMU_VECTOR
27END(vsyscall_2)