aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-06-27 05:53:50 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-27 20:32:38 -0400
commite6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch)
treec8945bb3ae5bec38693d801fb589d22d48d6f8eb /arch/i386/kernel
parentd5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff)
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it. Besides the security implications, this feature also helps debuggers, which can COW a vma-backed VDSO just like a normal DSO and can thus do single-stepping and other debugging features. It's good for hypervisors (Xen, VMWare) too, which typically live in the same high-mapped address space as the VDSO, hence whenever the VDSO is used, they get lots of guest pagefaults and have to fix such guest accesses up - which slows things down instead of speeding things up (the primary purpose of the VDSO). There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support for older glibcs that still rely on a prelinked high-mapped VDSO. Newer distributions (using glibc 2.3.3 or later) can turn this option off. Turning it off is also recommended for security reasons: attackers cannot use the predictable high-mapped VDSO page as syscall trampoline anymore. There is a new vdso=[0|1] boot option as well, and a runtime /proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned on/off. (This version of the VDSO-randomization patch also has working ELF coredumping, the previous patch crashed in the coredumping code.) This code is a combined work of the exec-shield VDSO randomization code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell started this patch and i completed it. [akpm@osdl.org: cleanups] [akpm@osdl.org: compile fix] [akpm@osdl.org: compile fix 2] [akpm@osdl.org: compile fix 3] [akpm@osdl.org: revernt MAXMEM change] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Cc: Gerd Hoffmann <kraxel@suse.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Zachary Amsden <zach@vmware.com> Cc: Andi Kleen <ak@muc.de> Cc: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/asm-offsets.c4
-rw-r--r--arch/i386/kernel/entry.S7
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/sysenter.c128
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S4
-rw-r--r--arch/i386/kernel/vsyscall.lds.S4
6 files changed, 139 insertions, 12 deletions
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1c3a809e6421..c80271f8f084 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -14,6 +14,7 @@
14#include <asm/fixmap.h> 14#include <asm/fixmap.h>
15#include <asm/processor.h> 15#include <asm/processor.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/elf.h>
17 18
18#define DEFINE(sym, val) \ 19#define DEFINE(sym, val) \
19 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 20 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -54,6 +55,7 @@ void foo(void)
54 OFFSET(TI_preempt_count, thread_info, preempt_count); 55 OFFSET(TI_preempt_count, thread_info, preempt_count);
55 OFFSET(TI_addr_limit, thread_info, addr_limit); 56 OFFSET(TI_addr_limit, thread_info, addr_limit);
56 OFFSET(TI_restart_block, thread_info, restart_block); 57 OFFSET(TI_restart_block, thread_info, restart_block);
58 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
57 BLANK(); 59 BLANK();
58 60
59 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); 61 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
@@ -69,7 +71,7 @@ void foo(void)
69 sizeof(struct tss_struct)); 71 sizeof(struct tss_struct));
70 72
71 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 73 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
72 DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); 74 DEFINE(VDSO_PRELINK, VDSO_PRELINK);
73 75
74 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 76 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
75} 77}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index e8d2630fd19a..fbdb933251b6 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -270,7 +270,12 @@ sysenter_past_esp:
270 pushl $(__USER_CS) 270 pushl $(__USER_CS)
271 CFI_ADJUST_CFA_OFFSET 4 271 CFI_ADJUST_CFA_OFFSET 4
272 /*CFI_REL_OFFSET cs, 0*/ 272 /*CFI_REL_OFFSET cs, 0*/
273 pushl $SYSENTER_RETURN 273 /*
274 * Push current_thread_info()->sysenter_return to the stack.
275 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
276 * pushed above; +8 corresponds to copy_thread's esp0 setting.
277 */
278 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
274 CFI_ADJUST_CFA_OFFSET 4 279 CFI_ADJUST_CFA_OFFSET 4
275 CFI_REL_OFFSET eip, 0 280 CFI_REL_OFFSET eip, 0
276 281
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 5c352c3a9e7f..43002cfb40c4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
351 goto give_sigsegv; 351 goto give_sigsegv;
352 } 352 }
353 353
354 restorer = &__kernel_sigreturn; 354 restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
355 if (ka->sa.sa_flags & SA_RESTORER) 355 if (ka->sa.sa_flags & SA_RESTORER)
356 restorer = ka->sa.sa_restorer; 356 restorer = ka->sa.sa_restorer;
357 357
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
447 goto give_sigsegv; 447 goto give_sigsegv;
448 448
449 /* Set up to return from userspace. */ 449 /* Set up to return from userspace. */
450 restorer = &__kernel_rt_sigreturn; 450 restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
451 if (ka->sa.sa_flags & SA_RESTORER) 451 if (ka->sa.sa_flags & SA_RESTORER)
452 restorer = ka->sa.sa_restorer; 452 restorer = ka->sa.sa_restorer;
453 err |= __put_user(restorer, &frame->pretcode); 453 err |= __put_user(restorer, &frame->pretcode);
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 0bada1870bdf..c60419dee018 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -2,6 +2,8 @@
2 * linux/arch/i386/kernel/sysenter.c 2 * linux/arch/i386/kernel/sysenter.c
3 * 3 *
4 * (C) Copyright 2002 Linus Torvalds 4 * (C) Copyright 2002 Linus Torvalds
5 * Portions based on the vdso-randomization code from exec-shield:
6 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
5 * 7 *
6 * This file contains the needed initializations to support sysenter. 8 * This file contains the needed initializations to support sysenter.
7 */ 9 */
@@ -13,12 +15,31 @@
13#include <linux/gfp.h> 15#include <linux/gfp.h>
14#include <linux/string.h> 16#include <linux/string.h>
15#include <linux/elf.h> 17#include <linux/elf.h>
18#include <linux/mm.h>
19#include <linux/module.h>
16 20
17#include <asm/cpufeature.h> 21#include <asm/cpufeature.h>
18#include <asm/msr.h> 22#include <asm/msr.h>
19#include <asm/pgtable.h> 23#include <asm/pgtable.h>
20#include <asm/unistd.h> 24#include <asm/unistd.h>
21 25
26/*
27 * Should the kernel map a VDSO page into processes and pass its
28 * address down to glibc upon exec()?
29 */
30unsigned int __read_mostly vdso_enabled = 1;
31
32EXPORT_SYMBOL_GPL(vdso_enabled);
33
34static int __init vdso_setup(char *s)
35{
36 vdso_enabled = simple_strtoul(s, NULL, 0);
37
38 return 1;
39}
40
41__setup("vdso=", vdso_setup);
42
22extern asmlinkage void sysenter_entry(void); 43extern asmlinkage void sysenter_entry(void);
23 44
24void enable_sep_cpu(void) 45void enable_sep_cpu(void)
@@ -45,23 +66,122 @@ void enable_sep_cpu(void)
45 */ 66 */
46extern const char vsyscall_int80_start, vsyscall_int80_end; 67extern const char vsyscall_int80_start, vsyscall_int80_end;
47extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 68extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
69static void *syscall_page;
48 70
49int __init sysenter_setup(void) 71int __init sysenter_setup(void)
50{ 72{
51 void *page = (void *)get_zeroed_page(GFP_ATOMIC); 73 syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
52 74
53 __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); 75#ifdef CONFIG_COMPAT_VDSO
76 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
77 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
78#else
79 /*
80 * In the non-compat case the ELF coredumping code needs the fixmap:
81 */
82 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
83#endif
54 84
55 if (!boot_cpu_has(X86_FEATURE_SEP)) { 85 if (!boot_cpu_has(X86_FEATURE_SEP)) {
56 memcpy(page, 86 memcpy(syscall_page,
57 &vsyscall_int80_start, 87 &vsyscall_int80_start,
58 &vsyscall_int80_end - &vsyscall_int80_start); 88 &vsyscall_int80_end - &vsyscall_int80_start);
59 return 0; 89 return 0;
60 } 90 }
61 91
62 memcpy(page, 92 memcpy(syscall_page,
63 &vsyscall_sysenter_start, 93 &vsyscall_sysenter_start,
64 &vsyscall_sysenter_end - &vsyscall_sysenter_start); 94 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
65 95
66 return 0; 96 return 0;
67} 97}
98
99static struct page *syscall_nopage(struct vm_area_struct *vma,
100 unsigned long adr, int *type)
101{
102 struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
103 get_page(p);
104 return p;
105}
106
107/* Prevent VMA merging */
108static void syscall_vma_close(struct vm_area_struct *vma)
109{
110}
111
112static struct vm_operations_struct syscall_vm_ops = {
113 .close = syscall_vma_close,
114 .nopage = syscall_nopage,
115};
116
117/* Defined in vsyscall-sysenter.S */
118extern void SYSENTER_RETURN;
119
120/* Setup a VMA at program startup for the vsyscall page */
121int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
122{
123 struct vm_area_struct *vma;
124 struct mm_struct *mm = current->mm;
125 unsigned long addr;
126 int ret;
127
128 down_write(&mm->mmap_sem);
129 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
130 if (IS_ERR_VALUE(addr)) {
131 ret = addr;
132 goto up_fail;
133 }
134
135 vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
136 if (!vma) {
137 ret = -ENOMEM;
138 goto up_fail;
139 }
140
141 vma->vm_start = addr;
142 vma->vm_end = addr + PAGE_SIZE;
143 /* MAYWRITE to allow gdb to COW and set breakpoints */
144 vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
145 vma->vm_flags |= mm->def_flags;
146 vma->vm_page_prot = protection_map[vma->vm_flags & 7];
147 vma->vm_ops = &syscall_vm_ops;
148 vma->vm_mm = mm;
149
150 ret = insert_vm_struct(mm, vma);
151 if (ret)
152 goto free_vma;
153
154 current->mm->context.vdso = (void *)addr;
155 current_thread_info()->sysenter_return =
156 (void *)VDSO_SYM(&SYSENTER_RETURN);
157 mm->total_vm++;
158up_fail:
159 up_write(&mm->mmap_sem);
160 return ret;
161
162free_vma:
163 kmem_cache_free(vm_area_cachep, vma);
164 return ret;
165}
166
167const char *arch_vma_name(struct vm_area_struct *vma)
168{
169 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
170 return "[vdso]";
171 return NULL;
172}
173
174struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
175{
176 return NULL;
177}
178
179int in_gate_area(struct task_struct *task, unsigned long addr)
180{
181 return 0;
182}
183
184int in_gate_area_no_task(unsigned long addr)
185{
186 return 0;
187}
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
index 3b62baa6a371..1a36d26e15eb 100644
--- a/arch/i386/kernel/vsyscall-sysenter.S
+++ b/arch/i386/kernel/vsyscall-sysenter.S
@@ -42,10 +42,10 @@ __kernel_vsyscall:
42 /* 7: align return point with nop's to make disassembly easier */ 42 /* 7: align return point with nop's to make disassembly easier */
43 .space 7,0x90 43 .space 7,0x90
44 44
45 /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ 45 /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
46 jmp .Lenter_kernel 46 jmp .Lenter_kernel
47 /* 16: System call normal return point is here! */ 47 /* 16: System call normal return point is here! */
48 .globl SYSENTER_RETURN /* Symbol used by entry.S. */ 48 .globl SYSENTER_RETURN /* Symbol used by sysenter.c */
49SYSENTER_RETURN: 49SYSENTER_RETURN:
50 pop %ebp 50 pop %ebp
51.Lpop_ebp: 51.Lpop_ebp:
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
index 98699ca6e52d..e26975fc68b6 100644
--- a/arch/i386/kernel/vsyscall.lds.S
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -7,7 +7,7 @@
7 7
8SECTIONS 8SECTIONS
9{ 9{
10 . = VSYSCALL_BASE + SIZEOF_HEADERS; 10 . = VDSO_PRELINK + SIZEOF_HEADERS;
11 11
12 .hash : { *(.hash) } :text 12 .hash : { *(.hash) } :text
13 .dynsym : { *(.dynsym) } 13 .dynsym : { *(.dynsym) }
@@ -20,7 +20,7 @@ SECTIONS
20 For the layouts to match, we need to skip more than enough 20 For the layouts to match, we need to skip more than enough
21 space for the dynamic symbol table et al. If this amount 21 space for the dynamic symbol table et al. If this amount
22 is insufficient, ld -shared will barf. Just increase it here. */ 22 is insufficient, ld -shared will barf. Just increase it here. */
23 . = VSYSCALL_BASE + 0x400; 23 . = VDSO_PRELINK + 0x400;
24 24
25 .text : { *(.text) } :text =0x90909090 25 .text : { *(.text) } :text =0x90909090
26 .note : { *(.note.*) } :text :note 26 .note : { *(.note.*) } :text :note