aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/sysenter.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-06-27 05:53:50 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-27 20:32:38 -0400
commite6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch)
treec8945bb3ae5bec38693d801fb589d22d48d6f8eb /arch/i386/kernel/sysenter.c
parentd5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff)
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it. Besides the security implications, this feature also helps debuggers, which can COW a vma-backed VDSO just like a normal DSO and can thus do single-stepping and other debugging features. It's good for hypervisors (Xen, VMWare) too, which typically live in the same high-mapped address space as the VDSO, hence whenever the VDSO is used, they get lots of guest pagefaults and have to fix such guest accesses up - which slows things down instead of speeding things up (the primary purpose of the VDSO). There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support for older glibcs that still rely on a prelinked high-mapped VDSO. Newer distributions (using glibc 2.3.3 or later) can turn this option off. Turning it off is also recommended for security reasons: attackers cannot use the predictable high-mapped VDSO page as syscall trampoline anymore. There is a new vdso=[0|1] boot option as well, and a runtime /proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned on/off. (This version of the VDSO-randomization patch also has working ELF coredumping, the previous patch crashed in the coredumping code.) This code is a combined work of the exec-shield VDSO randomization code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell started this patch and i completed it. [akpm@osdl.org: cleanups] [akpm@osdl.org: compile fix] [akpm@osdl.org: compile fix 2] [akpm@osdl.org: compile fix 3] [akpm@osdl.org: revernt MAXMEM change] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Cc: Gerd Hoffmann <kraxel@suse.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Zachary Amsden <zach@vmware.com> Cc: Andi Kleen <ak@muc.de> Cc: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel/sysenter.c')
-rw-r--r--arch/i386/kernel/sysenter.c128
1 files changed, 124 insertions, 4 deletions
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 0bada1870bdf..c60419dee018 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -2,6 +2,8 @@
2 * linux/arch/i386/kernel/sysenter.c 2 * linux/arch/i386/kernel/sysenter.c
3 * 3 *
4 * (C) Copyright 2002 Linus Torvalds 4 * (C) Copyright 2002 Linus Torvalds
5 * Portions based on the vdso-randomization code from exec-shield:
6 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
5 * 7 *
6 * This file contains the needed initializations to support sysenter. 8 * This file contains the needed initializations to support sysenter.
7 */ 9 */
@@ -13,12 +15,31 @@
13#include <linux/gfp.h> 15#include <linux/gfp.h>
14#include <linux/string.h> 16#include <linux/string.h>
15#include <linux/elf.h> 17#include <linux/elf.h>
18#include <linux/mm.h>
19#include <linux/module.h>
16 20
17#include <asm/cpufeature.h> 21#include <asm/cpufeature.h>
18#include <asm/msr.h> 22#include <asm/msr.h>
19#include <asm/pgtable.h> 23#include <asm/pgtable.h>
20#include <asm/unistd.h> 24#include <asm/unistd.h>
21 25
26/*
27 * Should the kernel map a VDSO page into processes and pass its
28 * address down to glibc upon exec()?
29 */
30unsigned int __read_mostly vdso_enabled = 1;
31
32EXPORT_SYMBOL_GPL(vdso_enabled);
33
34static int __init vdso_setup(char *s)
35{
36 vdso_enabled = simple_strtoul(s, NULL, 0);
37
38 return 1;
39}
40
41__setup("vdso=", vdso_setup);
42
22extern asmlinkage void sysenter_entry(void); 43extern asmlinkage void sysenter_entry(void);
23 44
24void enable_sep_cpu(void) 45void enable_sep_cpu(void)
@@ -45,23 +66,122 @@ void enable_sep_cpu(void)
45 */ 66 */
46extern const char vsyscall_int80_start, vsyscall_int80_end; 67extern const char vsyscall_int80_start, vsyscall_int80_end;
47extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 68extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
69static void *syscall_page;
48 70
49int __init sysenter_setup(void) 71int __init sysenter_setup(void)
50{ 72{
51 void *page = (void *)get_zeroed_page(GFP_ATOMIC); 73 syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
52 74
53 __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); 75#ifdef CONFIG_COMPAT_VDSO
76 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
77 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
78#else
79 /*
80 * In the non-compat case the ELF coredumping code needs the fixmap:
81 */
82 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
83#endif
54 84
55 if (!boot_cpu_has(X86_FEATURE_SEP)) { 85 if (!boot_cpu_has(X86_FEATURE_SEP)) {
56 memcpy(page, 86 memcpy(syscall_page,
57 &vsyscall_int80_start, 87 &vsyscall_int80_start,
58 &vsyscall_int80_end - &vsyscall_int80_start); 88 &vsyscall_int80_end - &vsyscall_int80_start);
59 return 0; 89 return 0;
60 } 90 }
61 91
62 memcpy(page, 92 memcpy(syscall_page,
63 &vsyscall_sysenter_start, 93 &vsyscall_sysenter_start,
64 &vsyscall_sysenter_end - &vsyscall_sysenter_start); 94 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
65 95
66 return 0; 96 return 0;
67} 97}
98
99static struct page *syscall_nopage(struct vm_area_struct *vma,
100 unsigned long adr, int *type)
101{
102 struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
103 get_page(p);
104 return p;
105}
106
107/* Prevent VMA merging */
108static void syscall_vma_close(struct vm_area_struct *vma)
109{
110}
111
112static struct vm_operations_struct syscall_vm_ops = {
113 .close = syscall_vma_close,
114 .nopage = syscall_nopage,
115};
116
117/* Defined in vsyscall-sysenter.S */
118extern void SYSENTER_RETURN;
119
120/* Setup a VMA at program startup for the vsyscall page */
121int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
122{
123 struct vm_area_struct *vma;
124 struct mm_struct *mm = current->mm;
125 unsigned long addr;
126 int ret;
127
128 down_write(&mm->mmap_sem);
129 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
130 if (IS_ERR_VALUE(addr)) {
131 ret = addr;
132 goto up_fail;
133 }
134
135 vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
136 if (!vma) {
137 ret = -ENOMEM;
138 goto up_fail;
139 }
140
141 vma->vm_start = addr;
142 vma->vm_end = addr + PAGE_SIZE;
143 /* MAYWRITE to allow gdb to COW and set breakpoints */
144 vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
145 vma->vm_flags |= mm->def_flags;
146 vma->vm_page_prot = protection_map[vma->vm_flags & 7];
147 vma->vm_ops = &syscall_vm_ops;
148 vma->vm_mm = mm;
149
150 ret = insert_vm_struct(mm, vma);
151 if (ret)
152 goto free_vma;
153
154 current->mm->context.vdso = (void *)addr;
155 current_thread_info()->sysenter_return =
156 (void *)VDSO_SYM(&SYSENTER_RETURN);
157 mm->total_vm++;
158up_fail:
159 up_write(&mm->mmap_sem);
160 return ret;
161
162free_vma:
163 kmem_cache_free(vm_area_cachep, vma);
164 return ret;
165}
166
167const char *arch_vma_name(struct vm_area_struct *vma)
168{
169 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
170 return "[vdso]";
171 return NULL;
172}
173
174struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
175{
176 return NULL;
177}
178
179int in_gate_area(struct task_struct *task, unsigned long addr)
180{
181 return 0;
182}
183
184int in_gate_area_no_task(unsigned long addr)
185{
186 return 0;
187}