diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-06-27 05:53:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-27 20:32:38 -0400 |
commit | e6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch) | |
tree | c8945bb3ae5bec38693d801fb589d22d48d6f8eb /arch/i386 | |
parent | d5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff) |
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it.
Besides the security implications, this feature also helps debuggers, which
can COW a vma-backed VDSO just like a normal DSO and can thus do
single-stepping and other debugging features.
It's good for hypervisors (Xen, VMWare) too, which typically live in the same
high-mapped address space as the VDSO, hence whenever the VDSO is used, they
get lots of guest pagefaults and have to fix such guest accesses up - which
slows things down instead of speeding things up (the primary purpose of the
VDSO).
There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support
for older glibcs that still rely on a prelinked high-mapped VDSO. Newer
distributions (using glibc 2.3.3 or later) can turn this option off. Turning
it off is also recommended for security reasons: attackers cannot use the
predictable high-mapped VDSO page as syscall trampoline anymore.
There is a new vdso=[0|1] boot option as well, and a runtime
/proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned
on/off.
(This version of the VDSO-randomization patch also has working ELF
coredumping, the previous patch crashed in the coredumping code.)
This code is a combined work of the exec-shield VDSO randomization
code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell
started this patch and i completed it.
[akpm@osdl.org: cleanups]
[akpm@osdl.org: compile fix]
[akpm@osdl.org: compile fix 2]
[akpm@osdl.org: compile fix 3]
[akpm@osdl.org: revernt MAXMEM change]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Cc: Gerd Hoffmann <kraxel@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 11 | ||||
-rw-r--r-- | arch/i386/kernel/asm-offsets.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 7 | ||||
-rw-r--r-- | arch/i386/kernel/signal.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/sysenter.c | 128 | ||||
-rw-r--r-- | arch/i386/kernel/vsyscall-sysenter.S | 4 | ||||
-rw-r--r-- | arch/i386/kernel/vsyscall.lds.S | 4 |
7 files changed, 150 insertions, 12 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6662f8c44798..3bb221db164a 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -780,6 +780,17 @@ config HOTPLUG_CPU | |||
780 | enable suspend on SMP systems. CPUs can be controlled through | 780 | enable suspend on SMP systems. CPUs can be controlled through |
781 | /sys/devices/system/cpu. | 781 | /sys/devices/system/cpu. |
782 | 782 | ||
783 | config COMPAT_VDSO | ||
784 | bool "Compat VDSO support" | ||
785 | default y | ||
786 | help | ||
787 | Map the VDSO to the predictable old-style address too. | ||
788 | ---help--- | ||
789 | Say N here if you are running a sufficiently recent glibc | ||
790 | version (2.3.3 or later), to remove the high-mapped | ||
791 | VDSO mapping and to exclusively use the randomized VDSO. | ||
792 | |||
793 | If unsure, say Y. | ||
783 | 794 | ||
784 | endmenu | 795 | endmenu |
785 | 796 | ||
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1c3a809e6421..c80271f8f084 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/fixmap.h> | 14 | #include <asm/fixmap.h> |
15 | #include <asm/processor.h> | 15 | #include <asm/processor.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/elf.h> | ||
17 | 18 | ||
18 | #define DEFINE(sym, val) \ | 19 | #define DEFINE(sym, val) \ |
19 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 20 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
@@ -54,6 +55,7 @@ void foo(void) | |||
54 | OFFSET(TI_preempt_count, thread_info, preempt_count); | 55 | OFFSET(TI_preempt_count, thread_info, preempt_count); |
55 | OFFSET(TI_addr_limit, thread_info, addr_limit); | 56 | OFFSET(TI_addr_limit, thread_info, addr_limit); |
56 | OFFSET(TI_restart_block, thread_info, restart_block); | 57 | OFFSET(TI_restart_block, thread_info, restart_block); |
58 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); | ||
57 | BLANK(); | 59 | BLANK(); |
58 | 60 | ||
59 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | 61 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); |
@@ -69,7 +71,7 @@ void foo(void) | |||
69 | sizeof(struct tss_struct)); | 71 | sizeof(struct tss_struct)); |
70 | 72 | ||
71 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 73 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
72 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); | 74 | DEFINE(VDSO_PRELINK, VDSO_PRELINK); |
73 | 75 | ||
74 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 76 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
75 | } | 77 | } |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e8d2630fd19a..fbdb933251b6 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -270,7 +270,12 @@ sysenter_past_esp: | |||
270 | pushl $(__USER_CS) | 270 | pushl $(__USER_CS) |
271 | CFI_ADJUST_CFA_OFFSET 4 | 271 | CFI_ADJUST_CFA_OFFSET 4 |
272 | /*CFI_REL_OFFSET cs, 0*/ | 272 | /*CFI_REL_OFFSET cs, 0*/ |
273 | pushl $SYSENTER_RETURN | 273 | /* |
274 | * Push current_thread_info()->sysenter_return to the stack. | ||
275 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | ||
276 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | ||
277 | */ | ||
278 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | ||
274 | CFI_ADJUST_CFA_OFFSET 4 | 279 | CFI_ADJUST_CFA_OFFSET 4 |
275 | CFI_REL_OFFSET eip, 0 | 280 | CFI_REL_OFFSET eip, 0 |
276 | 281 | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 5c352c3a9e7f..43002cfb40c4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
351 | goto give_sigsegv; | 351 | goto give_sigsegv; |
352 | } | 352 | } |
353 | 353 | ||
354 | restorer = &__kernel_sigreturn; | 354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); |
355 | if (ka->sa.sa_flags & SA_RESTORER) | 355 | if (ka->sa.sa_flags & SA_RESTORER) |
356 | restorer = ka->sa.sa_restorer; | 356 | restorer = ka->sa.sa_restorer; |
357 | 357 | ||
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
447 | goto give_sigsegv; | 447 | goto give_sigsegv; |
448 | 448 | ||
449 | /* Set up to return from userspace. */ | 449 | /* Set up to return from userspace. */ |
450 | restorer = &__kernel_rt_sigreturn; | 450 | restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); |
451 | if (ka->sa.sa_flags & SA_RESTORER) | 451 | if (ka->sa.sa_flags & SA_RESTORER) |
452 | restorer = ka->sa.sa_restorer; | 452 | restorer = ka->sa.sa_restorer; |
453 | err |= __put_user(restorer, &frame->pretcode); | 453 | err |= __put_user(restorer, &frame->pretcode); |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0bada1870bdf..c60419dee018 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * linux/arch/i386/kernel/sysenter.c | 2 | * linux/arch/i386/kernel/sysenter.c |
3 | * | 3 | * |
4 | * (C) Copyright 2002 Linus Torvalds | 4 | * (C) Copyright 2002 Linus Torvalds |
5 | * Portions based on the vdso-randomization code from exec-shield: | ||
6 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | 7 | * |
6 | * This file contains the needed initializations to support sysenter. | 8 | * This file contains the needed initializations to support sysenter. |
7 | */ | 9 | */ |
@@ -13,12 +15,31 @@ | |||
13 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
14 | #include <linux/string.h> | 16 | #include <linux/string.h> |
15 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
18 | #include <linux/mm.h> | ||
19 | #include <linux/module.h> | ||
16 | 20 | ||
17 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
18 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
19 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
20 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
21 | 25 | ||
26 | /* | ||
27 | * Should the kernel map a VDSO page into processes and pass its | ||
28 | * address down to glibc upon exec()? | ||
29 | */ | ||
30 | unsigned int __read_mostly vdso_enabled = 1; | ||
31 | |||
32 | EXPORT_SYMBOL_GPL(vdso_enabled); | ||
33 | |||
34 | static int __init vdso_setup(char *s) | ||
35 | { | ||
36 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | __setup("vdso=", vdso_setup); | ||
42 | |||
22 | extern asmlinkage void sysenter_entry(void); | 43 | extern asmlinkage void sysenter_entry(void); |
23 | 44 | ||
24 | void enable_sep_cpu(void) | 45 | void enable_sep_cpu(void) |
@@ -45,23 +66,122 @@ void enable_sep_cpu(void) | |||
45 | */ | 66 | */ |
46 | extern const char vsyscall_int80_start, vsyscall_int80_end; | 67 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
47 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 68 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
69 | static void *syscall_page; | ||
48 | 70 | ||
49 | int __init sysenter_setup(void) | 71 | int __init sysenter_setup(void) |
50 | { | 72 | { |
51 | void *page = (void *)get_zeroed_page(GFP_ATOMIC); | 73 | syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
52 | 74 | ||
53 | __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); | 75 | #ifdef CONFIG_COMPAT_VDSO |
76 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | ||
77 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
78 | #else | ||
79 | /* | ||
80 | * In the non-compat case the ELF coredumping code needs the fixmap: | ||
81 | */ | ||
82 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); | ||
83 | #endif | ||
54 | 84 | ||
55 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
56 | memcpy(page, | 86 | memcpy(syscall_page, |
57 | &vsyscall_int80_start, | 87 | &vsyscall_int80_start, |
58 | &vsyscall_int80_end - &vsyscall_int80_start); | 88 | &vsyscall_int80_end - &vsyscall_int80_start); |
59 | return 0; | 89 | return 0; |
60 | } | 90 | } |
61 | 91 | ||
62 | memcpy(page, | 92 | memcpy(syscall_page, |
63 | &vsyscall_sysenter_start, | 93 | &vsyscall_sysenter_start, |
64 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | 94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); |
65 | 95 | ||
66 | return 0; | 96 | return 0; |
67 | } | 97 | } |
98 | |||
99 | static struct page *syscall_nopage(struct vm_area_struct *vma, | ||
100 | unsigned long adr, int *type) | ||
101 | { | ||
102 | struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); | ||
103 | get_page(p); | ||
104 | return p; | ||
105 | } | ||
106 | |||
107 | /* Prevent VMA merging */ | ||
108 | static void syscall_vma_close(struct vm_area_struct *vma) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | static struct vm_operations_struct syscall_vm_ops = { | ||
113 | .close = syscall_vma_close, | ||
114 | .nopage = syscall_nopage, | ||
115 | }; | ||
116 | |||
117 | /* Defined in vsyscall-sysenter.S */ | ||
118 | extern void SYSENTER_RETURN; | ||
119 | |||
120 | /* Setup a VMA at program startup for the vsyscall page */ | ||
121 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
122 | { | ||
123 | struct vm_area_struct *vma; | ||
124 | struct mm_struct *mm = current->mm; | ||
125 | unsigned long addr; | ||
126 | int ret; | ||
127 | |||
128 | down_write(&mm->mmap_sem); | ||
129 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
130 | if (IS_ERR_VALUE(addr)) { | ||
131 | ret = addr; | ||
132 | goto up_fail; | ||
133 | } | ||
134 | |||
135 | vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); | ||
136 | if (!vma) { | ||
137 | ret = -ENOMEM; | ||
138 | goto up_fail; | ||
139 | } | ||
140 | |||
141 | vma->vm_start = addr; | ||
142 | vma->vm_end = addr + PAGE_SIZE; | ||
143 | /* MAYWRITE to allow gdb to COW and set breakpoints */ | ||
144 | vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; | ||
145 | vma->vm_flags |= mm->def_flags; | ||
146 | vma->vm_page_prot = protection_map[vma->vm_flags & 7]; | ||
147 | vma->vm_ops = &syscall_vm_ops; | ||
148 | vma->vm_mm = mm; | ||
149 | |||
150 | ret = insert_vm_struct(mm, vma); | ||
151 | if (ret) | ||
152 | goto free_vma; | ||
153 | |||
154 | current->mm->context.vdso = (void *)addr; | ||
155 | current_thread_info()->sysenter_return = | ||
156 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
157 | mm->total_vm++; | ||
158 | up_fail: | ||
159 | up_write(&mm->mmap_sem); | ||
160 | return ret; | ||
161 | |||
162 | free_vma: | ||
163 | kmem_cache_free(vm_area_cachep, vma); | ||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
168 | { | ||
169 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
170 | return "[vdso]"; | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
174 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | int in_gate_area(struct task_struct *task, unsigned long addr) | ||
180 | { | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | int in_gate_area_no_task(unsigned long addr) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 3b62baa6a371..1a36d26e15eb 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S | |||
@@ -42,10 +42,10 @@ __kernel_vsyscall: | |||
42 | /* 7: align return point with nop's to make disassembly easier */ | 42 | /* 7: align return point with nop's to make disassembly easier */ |
43 | .space 7,0x90 | 43 | .space 7,0x90 |
44 | 44 | ||
45 | /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ | 45 | /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ |
46 | jmp .Lenter_kernel | 46 | jmp .Lenter_kernel |
47 | /* 16: System call normal return point is here! */ | 47 | /* 16: System call normal return point is here! */ |
48 | .globl SYSENTER_RETURN /* Symbol used by entry.S. */ | 48 | .globl SYSENTER_RETURN /* Symbol used by sysenter.c */ |
49 | SYSENTER_RETURN: | 49 | SYSENTER_RETURN: |
50 | pop %ebp | 50 | pop %ebp |
51 | .Lpop_ebp: | 51 | .Lpop_ebp: |
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index 98699ca6e52d..e26975fc68b6 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | SECTIONS | 8 | SECTIONS |
9 | { | 9 | { |
10 | . = VSYSCALL_BASE + SIZEOF_HEADERS; | 10 | . = VDSO_PRELINK + SIZEOF_HEADERS; |
11 | 11 | ||
12 | .hash : { *(.hash) } :text | 12 | .hash : { *(.hash) } :text |
13 | .dynsym : { *(.dynsym) } | 13 | .dynsym : { *(.dynsym) } |
@@ -20,7 +20,7 @@ SECTIONS | |||
20 | For the layouts to match, we need to skip more than enough | 20 | For the layouts to match, we need to skip more than enough |
21 | space for the dynamic symbol table et al. If this amount | 21 | space for the dynamic symbol table et al. If this amount |
22 | is insufficient, ld -shared will barf. Just increase it here. */ | 22 | is insufficient, ld -shared will barf. Just increase it here. */ |
23 | . = VSYSCALL_BASE + 0x400; | 23 | . = VDSO_PRELINK + 0x400; |
24 | 24 | ||
25 | .text : { *(.text) } :text =0x90909090 | 25 | .text : { *(.text) } :text =0x90909090 |
26 | .note : { *(.note.*) } :text :note | 26 | .note : { *(.note.*) } :text :note |