diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-06-27 05:53:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-27 20:32:38 -0400 |
commit | e6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch) | |
tree | c8945bb3ae5bec38693d801fb589d22d48d6f8eb /arch/i386/kernel/sysenter.c | |
parent | d5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff) |
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it.
Besides the security implications, this feature also helps debuggers, which
can COW a vma-backed VDSO just like a normal DSO and can thus do
single-stepping and other debugging features.
It's good for hypervisors (Xen, VMWare) too, which typically live in the same
high-mapped address space as the VDSO, hence whenever the VDSO is used, they
get lots of guest pagefaults and have to fix such guest accesses up - which
slows things down instead of speeding things up (the primary purpose of the
VDSO).
There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support
for older glibcs that still rely on a prelinked high-mapped VDSO. Newer
distributions (using glibc 2.3.3 or later) can turn this option off. Turning
it off is also recommended for security reasons: attackers cannot use the
predictable high-mapped VDSO page as syscall trampoline anymore.
There is a new vdso=[0|1] boot option as well, and a runtime
/proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned
on/off.
(This version of the VDSO-randomization patch also has working ELF
coredumping, the previous patch crashed in the coredumping code.)
This code is a combined work of the exec-shield VDSO randomization
code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell
started this patch and i completed it.
[akpm@osdl.org: cleanups]
[akpm@osdl.org: compile fix]
[akpm@osdl.org: compile fix 2]
[akpm@osdl.org: compile fix 3]
[akpm@osdl.org: revernt MAXMEM change]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Cc: Gerd Hoffmann <kraxel@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel/sysenter.c')
-rw-r--r-- | arch/i386/kernel/sysenter.c | 128 |
1 files changed, 124 insertions, 4 deletions
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0bada1870bdf..c60419dee018 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * linux/arch/i386/kernel/sysenter.c | 2 | * linux/arch/i386/kernel/sysenter.c |
3 | * | 3 | * |
4 | * (C) Copyright 2002 Linus Torvalds | 4 | * (C) Copyright 2002 Linus Torvalds |
5 | * Portions based on the vdso-randomization code from exec-shield: | ||
6 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | 7 | * |
6 | * This file contains the needed initializations to support sysenter. | 8 | * This file contains the needed initializations to support sysenter. |
7 | */ | 9 | */ |
@@ -13,12 +15,31 @@ | |||
13 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
14 | #include <linux/string.h> | 16 | #include <linux/string.h> |
15 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
18 | #include <linux/mm.h> | ||
19 | #include <linux/module.h> | ||
16 | 20 | ||
17 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
18 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
19 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
20 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
21 | 25 | ||
26 | /* | ||
27 | * Should the kernel map a VDSO page into processes and pass its | ||
28 | * address down to glibc upon exec()? | ||
29 | */ | ||
30 | unsigned int __read_mostly vdso_enabled = 1; | ||
31 | |||
32 | EXPORT_SYMBOL_GPL(vdso_enabled); | ||
33 | |||
34 | static int __init vdso_setup(char *s) | ||
35 | { | ||
36 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | __setup("vdso=", vdso_setup); | ||
42 | |||
22 | extern asmlinkage void sysenter_entry(void); | 43 | extern asmlinkage void sysenter_entry(void); |
23 | 44 | ||
24 | void enable_sep_cpu(void) | 45 | void enable_sep_cpu(void) |
@@ -45,23 +66,122 @@ void enable_sep_cpu(void) | |||
45 | */ | 66 | */ |
46 | extern const char vsyscall_int80_start, vsyscall_int80_end; | 67 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
47 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 68 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
69 | static void *syscall_page; | ||
48 | 70 | ||
49 | int __init sysenter_setup(void) | 71 | int __init sysenter_setup(void) |
50 | { | 72 | { |
51 | void *page = (void *)get_zeroed_page(GFP_ATOMIC); | 73 | syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
52 | 74 | ||
53 | __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); | 75 | #ifdef CONFIG_COMPAT_VDSO |
76 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | ||
77 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
78 | #else | ||
79 | /* | ||
80 | * In the non-compat case the ELF coredumping code needs the fixmap: | ||
81 | */ | ||
82 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); | ||
83 | #endif | ||
54 | 84 | ||
55 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
56 | memcpy(page, | 86 | memcpy(syscall_page, |
57 | &vsyscall_int80_start, | 87 | &vsyscall_int80_start, |
58 | &vsyscall_int80_end - &vsyscall_int80_start); | 88 | &vsyscall_int80_end - &vsyscall_int80_start); |
59 | return 0; | 89 | return 0; |
60 | } | 90 | } |
61 | 91 | ||
62 | memcpy(page, | 92 | memcpy(syscall_page, |
63 | &vsyscall_sysenter_start, | 93 | &vsyscall_sysenter_start, |
64 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | 94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); |
65 | 95 | ||
66 | return 0; | 96 | return 0; |
67 | } | 97 | } |
98 | |||
99 | static struct page *syscall_nopage(struct vm_area_struct *vma, | ||
100 | unsigned long adr, int *type) | ||
101 | { | ||
102 | struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); | ||
103 | get_page(p); | ||
104 | return p; | ||
105 | } | ||
106 | |||
107 | /* Prevent VMA merging */ | ||
108 | static void syscall_vma_close(struct vm_area_struct *vma) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | static struct vm_operations_struct syscall_vm_ops = { | ||
113 | .close = syscall_vma_close, | ||
114 | .nopage = syscall_nopage, | ||
115 | }; | ||
116 | |||
117 | /* Defined in vsyscall-sysenter.S */ | ||
118 | extern void SYSENTER_RETURN; | ||
119 | |||
120 | /* Setup a VMA at program startup for the vsyscall page */ | ||
121 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
122 | { | ||
123 | struct vm_area_struct *vma; | ||
124 | struct mm_struct *mm = current->mm; | ||
125 | unsigned long addr; | ||
126 | int ret; | ||
127 | |||
128 | down_write(&mm->mmap_sem); | ||
129 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
130 | if (IS_ERR_VALUE(addr)) { | ||
131 | ret = addr; | ||
132 | goto up_fail; | ||
133 | } | ||
134 | |||
135 | vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); | ||
136 | if (!vma) { | ||
137 | ret = -ENOMEM; | ||
138 | goto up_fail; | ||
139 | } | ||
140 | |||
141 | vma->vm_start = addr; | ||
142 | vma->vm_end = addr + PAGE_SIZE; | ||
143 | /* MAYWRITE to allow gdb to COW and set breakpoints */ | ||
144 | vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; | ||
145 | vma->vm_flags |= mm->def_flags; | ||
146 | vma->vm_page_prot = protection_map[vma->vm_flags & 7]; | ||
147 | vma->vm_ops = &syscall_vm_ops; | ||
148 | vma->vm_mm = mm; | ||
149 | |||
150 | ret = insert_vm_struct(mm, vma); | ||
151 | if (ret) | ||
152 | goto free_vma; | ||
153 | |||
154 | current->mm->context.vdso = (void *)addr; | ||
155 | current_thread_info()->sysenter_return = | ||
156 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
157 | mm->total_vm++; | ||
158 | up_fail: | ||
159 | up_write(&mm->mmap_sem); | ||
160 | return ret; | ||
161 | |||
162 | free_vma: | ||
163 | kmem_cache_free(vm_area_cachep, vma); | ||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
168 | { | ||
169 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
170 | return "[vdso]"; | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
174 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | int in_gate_area(struct task_struct *task, unsigned long addr) | ||
180 | { | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | int in_gate_area_no_task(unsigned long addr) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||