aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-06-27 05:53:50 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-27 20:32:38 -0400
commite6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch)
treec8945bb3ae5bec38693d801fb589d22d48d6f8eb
parentd5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff)
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it. Besides the security implications, this feature also helps debuggers, which can COW a vma-backed VDSO just like a normal DSO and can thus do single-stepping and other debugging features. It's good for hypervisors (Xen, VMWare) too, which typically live in the same high-mapped address space as the VDSO, hence whenever the VDSO is used, they get lots of guest pagefaults and have to fix such guest accesses up - which slows things down instead of speeding things up (the primary purpose of the VDSO). There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support for older glibcs that still rely on a prelinked high-mapped VDSO. Newer distributions (using glibc 2.3.3 or later) can turn this option off. Turning it off is also recommended for security reasons: attackers cannot use the predictable high-mapped VDSO page as syscall trampoline anymore. There is a new vdso=[0|1] boot option as well, and a runtime /proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned on/off. (This version of the VDSO-randomization patch also has working ELF coredumping, the previous patch crashed in the coredumping code.) This code is a combined work of the exec-shield VDSO randomization code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell started this patch and i completed it. [akpm@osdl.org: cleanups] [akpm@osdl.org: compile fix] [akpm@osdl.org: compile fix 2] [akpm@osdl.org: compile fix 3] [akpm@osdl.org: revernt MAXMEM change] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Cc: Gerd Hoffmann <kraxel@suse.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Zachary Amsden <zach@vmware.com> Cc: Andi Kleen <ak@muc.de> Cc: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--arch/i386/Kconfig11
-rw-r--r--arch/i386/kernel/asm-offsets.c4
-rw-r--r--arch/i386/kernel/entry.S7
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/sysenter.c128
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S4
-rw-r--r--arch/i386/kernel/vsyscall.lds.S4
-rw-r--r--fs/proc/task_mmu.c30
-rw-r--r--include/asm-i386/elf.h53
-rw-r--r--include/asm-i386/fixmap.h10
-rw-r--r--include/asm-i386/mmu.h1
-rw-r--r--include/asm-i386/page.h3
-rw-r--r--include/asm-i386/thread_info.h1
-rw-r--r--include/asm-i386/unwind.h4
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--kernel/sysctl.c12
18 files changed, 235 insertions, 48 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2e352a605fcf..0d189c93eeaf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1669,6 +1669,10 @@ running once the system is up.
1669 usbhid.mousepoll= 1669 usbhid.mousepoll=
1670 [USBHID] The interval which mice are to be polled at. 1670 [USBHID] The interval which mice are to be polled at.
1671 1671
1672 vdso= [IA-32]
1673 vdso=1: enable VDSO (default)
1674 vdso=0: disable VDSO mapping
1675
1672 video= [FB] Frame buffer configuration 1676 video= [FB] Frame buffer configuration
1673 See Documentation/fb/modedb.txt. 1677 See Documentation/fb/modedb.txt.
1674 1678
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 6662f8c44798..3bb221db164a 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -780,6 +780,17 @@ config HOTPLUG_CPU
780 enable suspend on SMP systems. CPUs can be controlled through 780 enable suspend on SMP systems. CPUs can be controlled through
781 /sys/devices/system/cpu. 781 /sys/devices/system/cpu.
782 782
783config COMPAT_VDSO
784 bool "Compat VDSO support"
785 default y
786 help
787 Map the VDSO to the predictable old-style address too.
788 ---help---
789 Say N here if you are running a sufficiently recent glibc
790 version (2.3.3 or later), to remove the high-mapped
791 VDSO mapping and to exclusively use the randomized VDSO.
792
793 If unsure, say Y.
783 794
784endmenu 795endmenu
785 796
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1c3a809e6421..c80271f8f084 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -14,6 +14,7 @@
14#include <asm/fixmap.h> 14#include <asm/fixmap.h>
15#include <asm/processor.h> 15#include <asm/processor.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/elf.h>
17 18
18#define DEFINE(sym, val) \ 19#define DEFINE(sym, val) \
19 asm volatile("\n->" #sym " %0 " #val : : "i" (val)) 20 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -54,6 +55,7 @@ void foo(void)
54 OFFSET(TI_preempt_count, thread_info, preempt_count); 55 OFFSET(TI_preempt_count, thread_info, preempt_count);
55 OFFSET(TI_addr_limit, thread_info, addr_limit); 56 OFFSET(TI_addr_limit, thread_info, addr_limit);
56 OFFSET(TI_restart_block, thread_info, restart_block); 57 OFFSET(TI_restart_block, thread_info, restart_block);
58 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
57 BLANK(); 59 BLANK();
58 60
59 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); 61 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
@@ -69,7 +71,7 @@ void foo(void)
69 sizeof(struct tss_struct)); 71 sizeof(struct tss_struct));
70 72
71 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 73 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
72 DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); 74 DEFINE(VDSO_PRELINK, VDSO_PRELINK);
73 75
74 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 76 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
75} 77}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index e8d2630fd19a..fbdb933251b6 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -270,7 +270,12 @@ sysenter_past_esp:
270 pushl $(__USER_CS) 270 pushl $(__USER_CS)
271 CFI_ADJUST_CFA_OFFSET 4 271 CFI_ADJUST_CFA_OFFSET 4
272 /*CFI_REL_OFFSET cs, 0*/ 272 /*CFI_REL_OFFSET cs, 0*/
273 pushl $SYSENTER_RETURN 273 /*
274 * Push current_thread_info()->sysenter_return to the stack.
275 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
276 * pushed above; +8 corresponds to copy_thread's esp0 setting.
277 */
278 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
274 CFI_ADJUST_CFA_OFFSET 4 279 CFI_ADJUST_CFA_OFFSET 4
275 CFI_REL_OFFSET eip, 0 280 CFI_REL_OFFSET eip, 0
276 281
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 5c352c3a9e7f..43002cfb40c4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
351 goto give_sigsegv; 351 goto give_sigsegv;
352 } 352 }
353 353
354 restorer = &__kernel_sigreturn; 354 restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
355 if (ka->sa.sa_flags & SA_RESTORER) 355 if (ka->sa.sa_flags & SA_RESTORER)
356 restorer = ka->sa.sa_restorer; 356 restorer = ka->sa.sa_restorer;
357 357
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
447 goto give_sigsegv; 447 goto give_sigsegv;
448 448
449 /* Set up to return from userspace. */ 449 /* Set up to return from userspace. */
450 restorer = &__kernel_rt_sigreturn; 450 restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
451 if (ka->sa.sa_flags & SA_RESTORER) 451 if (ka->sa.sa_flags & SA_RESTORER)
452 restorer = ka->sa.sa_restorer; 452 restorer = ka->sa.sa_restorer;
453 err |= __put_user(restorer, &frame->pretcode); 453 err |= __put_user(restorer, &frame->pretcode);
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 0bada1870bdf..c60419dee018 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -2,6 +2,8 @@
2 * linux/arch/i386/kernel/sysenter.c 2 * linux/arch/i386/kernel/sysenter.c
3 * 3 *
4 * (C) Copyright 2002 Linus Torvalds 4 * (C) Copyright 2002 Linus Torvalds
5 * Portions based on the vdso-randomization code from exec-shield:
6 * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
5 * 7 *
6 * This file contains the needed initializations to support sysenter. 8 * This file contains the needed initializations to support sysenter.
7 */ 9 */
@@ -13,12 +15,31 @@
13#include <linux/gfp.h> 15#include <linux/gfp.h>
14#include <linux/string.h> 16#include <linux/string.h>
15#include <linux/elf.h> 17#include <linux/elf.h>
18#include <linux/mm.h>
19#include <linux/module.h>
16 20
17#include <asm/cpufeature.h> 21#include <asm/cpufeature.h>
18#include <asm/msr.h> 22#include <asm/msr.h>
19#include <asm/pgtable.h> 23#include <asm/pgtable.h>
20#include <asm/unistd.h> 24#include <asm/unistd.h>
21 25
26/*
27 * Should the kernel map a VDSO page into processes and pass its
28 * address down to glibc upon exec()?
29 */
30unsigned int __read_mostly vdso_enabled = 1;
31
32EXPORT_SYMBOL_GPL(vdso_enabled);
33
34static int __init vdso_setup(char *s)
35{
36 vdso_enabled = simple_strtoul(s, NULL, 0);
37
38 return 1;
39}
40
41__setup("vdso=", vdso_setup);
42
22extern asmlinkage void sysenter_entry(void); 43extern asmlinkage void sysenter_entry(void);
23 44
24void enable_sep_cpu(void) 45void enable_sep_cpu(void)
@@ -45,23 +66,122 @@ void enable_sep_cpu(void)
45 */ 66 */
46extern const char vsyscall_int80_start, vsyscall_int80_end; 67extern const char vsyscall_int80_start, vsyscall_int80_end;
47extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 68extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
69static void *syscall_page;
48 70
49int __init sysenter_setup(void) 71int __init sysenter_setup(void)
50{ 72{
51 void *page = (void *)get_zeroed_page(GFP_ATOMIC); 73 syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
52 74
53 __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); 75#ifdef CONFIG_COMPAT_VDSO
76 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
77 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
78#else
79 /*
80 * In the non-compat case the ELF coredumping code needs the fixmap:
81 */
82 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
83#endif
54 84
55 if (!boot_cpu_has(X86_FEATURE_SEP)) { 85 if (!boot_cpu_has(X86_FEATURE_SEP)) {
56 memcpy(page, 86 memcpy(syscall_page,
57 &vsyscall_int80_start, 87 &vsyscall_int80_start,
58 &vsyscall_int80_end - &vsyscall_int80_start); 88 &vsyscall_int80_end - &vsyscall_int80_start);
59 return 0; 89 return 0;
60 } 90 }
61 91
62 memcpy(page, 92 memcpy(syscall_page,
63 &vsyscall_sysenter_start, 93 &vsyscall_sysenter_start,
64 &vsyscall_sysenter_end - &vsyscall_sysenter_start); 94 &vsyscall_sysenter_end - &vsyscall_sysenter_start);
65 95
66 return 0; 96 return 0;
67} 97}
98
99static struct page *syscall_nopage(struct vm_area_struct *vma,
100 unsigned long adr, int *type)
101{
102 struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
103 get_page(p);
104 return p;
105}
106
107/* Prevent VMA merging */
108static void syscall_vma_close(struct vm_area_struct *vma)
109{
110}
111
112static struct vm_operations_struct syscall_vm_ops = {
113 .close = syscall_vma_close,
114 .nopage = syscall_nopage,
115};
116
117/* Defined in vsyscall-sysenter.S */
118extern void SYSENTER_RETURN;
119
120/* Setup a VMA at program startup for the vsyscall page */
121int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
122{
123 struct vm_area_struct *vma;
124 struct mm_struct *mm = current->mm;
125 unsigned long addr;
126 int ret;
127
128 down_write(&mm->mmap_sem);
129 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
130 if (IS_ERR_VALUE(addr)) {
131 ret = addr;
132 goto up_fail;
133 }
134
135 vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
136 if (!vma) {
137 ret = -ENOMEM;
138 goto up_fail;
139 }
140
141 vma->vm_start = addr;
142 vma->vm_end = addr + PAGE_SIZE;
143 /* MAYWRITE to allow gdb to COW and set breakpoints */
144 vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
145 vma->vm_flags |= mm->def_flags;
146 vma->vm_page_prot = protection_map[vma->vm_flags & 7];
147 vma->vm_ops = &syscall_vm_ops;
148 vma->vm_mm = mm;
149
150 ret = insert_vm_struct(mm, vma);
151 if (ret)
152 goto free_vma;
153
154 current->mm->context.vdso = (void *)addr;
155 current_thread_info()->sysenter_return =
156 (void *)VDSO_SYM(&SYSENTER_RETURN);
157 mm->total_vm++;
158up_fail:
159 up_write(&mm->mmap_sem);
160 return ret;
161
162free_vma:
163 kmem_cache_free(vm_area_cachep, vma);
164 return ret;
165}
166
167const char *arch_vma_name(struct vm_area_struct *vma)
168{
169 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
170 return "[vdso]";
171 return NULL;
172}
173
174struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
175{
176 return NULL;
177}
178
179int in_gate_area(struct task_struct *task, unsigned long addr)
180{
181 return 0;
182}
183
184int in_gate_area_no_task(unsigned long addr)
185{
186 return 0;
187}
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
index 3b62baa6a371..1a36d26e15eb 100644
--- a/arch/i386/kernel/vsyscall-sysenter.S
+++ b/arch/i386/kernel/vsyscall-sysenter.S
@@ -42,10 +42,10 @@ __kernel_vsyscall:
42 /* 7: align return point with nop's to make disassembly easier */ 42 /* 7: align return point with nop's to make disassembly easier */
43 .space 7,0x90 43 .space 7,0x90
44 44
45 /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ 45 /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
46 jmp .Lenter_kernel 46 jmp .Lenter_kernel
47 /* 16: System call normal return point is here! */ 47 /* 16: System call normal return point is here! */
48 .globl SYSENTER_RETURN /* Symbol used by entry.S. */ 48 .globl SYSENTER_RETURN /* Symbol used by sysenter.c */
49SYSENTER_RETURN: 49SYSENTER_RETURN:
50 pop %ebp 50 pop %ebp
51.Lpop_ebp: 51.Lpop_ebp:
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S
index 98699ca6e52d..e26975fc68b6 100644
--- a/arch/i386/kernel/vsyscall.lds.S
+++ b/arch/i386/kernel/vsyscall.lds.S
@@ -7,7 +7,7 @@
7 7
8SECTIONS 8SECTIONS
9{ 9{
10 . = VSYSCALL_BASE + SIZEOF_HEADERS; 10 . = VDSO_PRELINK + SIZEOF_HEADERS;
11 11
12 .hash : { *(.hash) } :text 12 .hash : { *(.hash) } :text
13 .dynsym : { *(.dynsym) } 13 .dynsym : { *(.dynsym) }
@@ -20,7 +20,7 @@ SECTIONS
20 For the layouts to match, we need to skip more than enough 20 For the layouts to match, we need to skip more than enough
21 space for the dynamic symbol table et al. If this amount 21 space for the dynamic symbol table et al. If this amount
22 is insufficient, ld -shared will barf. Just increase it here. */ 22 is insufficient, ld -shared will barf. Just increase it here. */
23 . = VSYSCALL_BASE + 0x400; 23 . = VDSO_PRELINK + 0x400;
24 24
25 .text : { *(.text) } :text =0x90909090 25 .text : { *(.text) } :text =0x90909090
26 .note : { *(.note.*) } :text :note 26 .note : { *(.note.*) } :text :note
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0137ec4c1368..0a163a4f7764 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,6 +122,11 @@ struct mem_size_stats
122 unsigned long private_dirty; 122 unsigned long private_dirty;
123}; 123};
124 124
125__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
126{
127 return NULL;
128}
129
125static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 130static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
126{ 131{
127 struct proc_maps_private *priv = m->private; 132 struct proc_maps_private *priv = m->private;
@@ -158,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
158 pad_len_spaces(m, len); 163 pad_len_spaces(m, len);
159 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); 164 seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
160 } else { 165 } else {
161 if (mm) { 166 const char *name = arch_vma_name(vma);
162 if (vma->vm_start <= mm->start_brk && 167 if (!name) {
168 if (mm) {
169 if (vma->vm_start <= mm->start_brk &&
163 vma->vm_end >= mm->brk) { 170 vma->vm_end >= mm->brk) {
164 pad_len_spaces(m, len); 171 name = "[heap]";
165 seq_puts(m, "[heap]"); 172 } else if (vma->vm_start <= mm->start_stack &&
166 } else { 173 vma->vm_end >= mm->start_stack) {
167 if (vma->vm_start <= mm->start_stack && 174 name = "[stack]";
168 vma->vm_end >= mm->start_stack) {
169
170 pad_len_spaces(m, len);
171 seq_puts(m, "[stack]");
172 } 175 }
176 } else {
177 name = "[vdso]";
173 } 178 }
174 } else { 179 }
180 if (name) {
175 pad_len_spaces(m, len); 181 pad_len_spaces(m, len);
176 seq_puts(m, "[vdso]"); 182 seq_puts(m, name);
177 } 183 }
178 } 184 }
179 seq_putc(m, '\n'); 185 seq_putc(m, '\n');
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 4153d80e4d2b..1eac92cb5b16 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -10,6 +10,7 @@
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> /* for savesegment */ 11#include <asm/system.h> /* for savesegment */
12#include <asm/auxvec.h> 12#include <asm/auxvec.h>
13#include <asm/desc.h>
13 14
14#include <linux/utsname.h> 15#include <linux/utsname.h>
15 16
@@ -129,15 +130,41 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct
129#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) 130#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
130#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) 131#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
131 132
132#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) 133#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
133#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) 134#define VDSO_BASE ((unsigned long)current->mm->context.vdso)
134#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) 135
136#ifdef CONFIG_COMPAT_VDSO
137# define VDSO_COMPAT_BASE VDSO_HIGH_BASE
138# define VDSO_PRELINK VDSO_HIGH_BASE
139#else
140# define VDSO_COMPAT_BASE VDSO_BASE
141# define VDSO_PRELINK 0
142#endif
143
144#define VDSO_COMPAT_SYM(x) \
145 (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
146
147#define VDSO_SYM(x) \
148 (VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK)
149
150#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE)
151#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE)
152
135extern void __kernel_vsyscall; 153extern void __kernel_vsyscall;
136 154
155#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall)
156
157#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
158struct linux_binprm;
159extern int arch_setup_additional_pages(struct linux_binprm *bprm,
160 int executable_stack);
161
162extern unsigned int vdso_enabled;
163
137#define ARCH_DLINFO \ 164#define ARCH_DLINFO \
138do { \ 165do if (vdso_enabled) { \
139 NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ 166 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
140 NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ 167 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \
141} while (0) 168} while (0)
142 169
143/* 170/*
@@ -148,15 +175,15 @@ do { \
148 * Dumping its extra ELF program headers includes all the other information 175 * Dumping its extra ELF program headers includes all the other information
149 * a debugger needs to easily find how the vsyscall DSO was being used. 176 * a debugger needs to easily find how the vsyscall DSO was being used.
150 */ 177 */
151#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) 178#define ELF_CORE_EXTRA_PHDRS (VDSO_HIGH_EHDR->e_phnum)
152#define ELF_CORE_WRITE_EXTRA_PHDRS \ 179#define ELF_CORE_WRITE_EXTRA_PHDRS \
153do { \ 180do { \
154 const struct elf_phdr *const vsyscall_phdrs = \ 181 const struct elf_phdr *const vsyscall_phdrs = \
155 (const struct elf_phdr *) (VSYSCALL_BASE \ 182 (const struct elf_phdr *) (VDSO_HIGH_BASE \
156 + VSYSCALL_EHDR->e_phoff); \ 183 + VDSO_HIGH_EHDR->e_phoff); \
157 int i; \ 184 int i; \
158 Elf32_Off ofs = 0; \ 185 Elf32_Off ofs = 0; \
159 for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ 186 for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
160 struct elf_phdr phdr = vsyscall_phdrs[i]; \ 187 struct elf_phdr phdr = vsyscall_phdrs[i]; \
161 if (phdr.p_type == PT_LOAD) { \ 188 if (phdr.p_type == PT_LOAD) { \
162 BUG_ON(ofs != 0); \ 189 BUG_ON(ofs != 0); \
@@ -174,10 +201,10 @@ do { \
174#define ELF_CORE_WRITE_EXTRA_DATA \ 201#define ELF_CORE_WRITE_EXTRA_DATA \
175do { \ 202do { \
176 const struct elf_phdr *const vsyscall_phdrs = \ 203 const struct elf_phdr *const vsyscall_phdrs = \
177 (const struct elf_phdr *) (VSYSCALL_BASE \ 204 (const struct elf_phdr *) (VDSO_HIGH_BASE \
178 + VSYSCALL_EHDR->e_phoff); \ 205 + VDSO_HIGH_EHDR->e_phoff); \
179 int i; \ 206 int i; \
180 for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ 207 for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
181 if (vsyscall_phdrs[i].p_type == PT_LOAD) \ 208 if (vsyscall_phdrs[i].p_type == PT_LOAD) \
182 DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ 209 DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
183 PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ 210 PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index f7e068f4d2f9..a48cc3f7ccc6 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -51,7 +51,7 @@
51 */ 51 */
52enum fixed_addresses { 52enum fixed_addresses {
53 FIX_HOLE, 53 FIX_HOLE,
54 FIX_VSYSCALL, 54 FIX_VDSO,
55#ifdef CONFIG_X86_LOCAL_APIC 55#ifdef CONFIG_X86_LOCAL_APIC
56 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ 56 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
57#endif 57#endif
@@ -115,14 +115,6 @@ extern void __set_fixmap (enum fixed_addresses idx,
115#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) 115#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
116#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) 116#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
117 117
118/*
119 * This is the range that is readable by user mode, and things
120 * acting like user mode such as get_user_pages.
121 */
122#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
123#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
124
125
126extern void __this_fixmap_does_not_exist(void); 118extern void __this_fixmap_does_not_exist(void);
127 119
128/* 120/*
diff --git a/include/asm-i386/mmu.h b/include/asm-i386/mmu.h
index f431a0b86d4c..8358dd3df7aa 100644
--- a/include/asm-i386/mmu.h
+++ b/include/asm-i386/mmu.h
@@ -12,6 +12,7 @@ typedef struct {
12 int size; 12 int size;
13 struct semaphore sem; 13 struct semaphore sem;
14 void *ldt; 14 void *ldt;
15 void *vdso;
15} mm_context_t; 16} mm_context_t;
16 17
17#endif 18#endif
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index e3a552fa5538..f5bf544c729a 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -96,6 +96,8 @@ typedef struct { unsigned long pgprot; } pgprot_t;
96 96
97#ifndef __ASSEMBLY__ 97#ifndef __ASSEMBLY__
98 98
99struct vm_area_struct;
100
99/* 101/*
100 * This much address space is reserved for vmalloc() and iomap() 102 * This much address space is reserved for vmalloc() and iomap()
101 * as well as fixmap mappings. 103 * as well as fixmap mappings.
@@ -139,6 +141,7 @@ extern int page_is_ram(unsigned long pagenr);
139#include <asm-generic/memory_model.h> 141#include <asm-generic/memory_model.h>
140#include <asm-generic/page.h> 142#include <asm-generic/page.h>
141 143
144#define __HAVE_ARCH_GATE_AREA 1
142#endif /* __KERNEL__ */ 145#endif /* __KERNEL__ */
143 146
144#endif /* _I386_PAGE_H */ 147#endif /* _I386_PAGE_H */
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index ff1e2b1a7c84..2833fa2c0dd0 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -37,6 +37,7 @@ struct thread_info {
37 0-0xBFFFFFFF for user-thead 37 0-0xBFFFFFFF for user-thead
38 0-0xFFFFFFFF for kernel-thread 38 0-0xFFFFFFFF for kernel-thread
39 */ 39 */
40 void *sysenter_return;
40 struct restart_block restart_block; 41 struct restart_block restart_block;
41 42
42 unsigned long previous_esp; /* ESP of the previous stack in case 43 unsigned long previous_esp; /* ESP of the previous stack in case
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index d480f2e38215..69f0f1df6722 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -78,8 +78,8 @@ static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
78 return user_mode_vm(&info->regs); 78 return user_mode_vm(&info->regs);
79#else 79#else
80 return info->regs.eip < PAGE_OFFSET 80 return info->regs.eip < PAGE_OFFSET
81 || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL) 81 || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
82 && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE) 82 && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
83 || info->regs.esp < PAGE_OFFSET; 83 || info->regs.esp < PAGE_OFFSET;
84#endif 84#endif
85} 85}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a929ea197e48..ff1fa87df8d0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1065,5 +1065,7 @@ void drop_slab(void);
1065extern int randomize_va_space; 1065extern int randomize_va_space;
1066#endif 1066#endif
1067 1067
1068const char *arch_vma_name(struct vm_area_struct *vma);
1069
1068#endif /* __KERNEL__ */ 1070#endif /* __KERNEL__ */
1069#endif /* _LINUX_MM_H */ 1071#endif /* _LINUX_MM_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 349ef908a222..bee12a7a0576 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -189,6 +189,7 @@ enum
189 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ 189 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
190 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ 190 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
191 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 191 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
192 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
192}; 193};
193 194
194 195
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f1a4eb1a655e..f54afed8426f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -928,6 +928,18 @@ static ctl_table vm_table[] = {
928 .strategy = &sysctl_jiffies, 928 .strategy = &sysctl_jiffies,
929 }, 929 },
930#endif 930#endif
931#ifdef CONFIG_X86_32
932 {
933 .ctl_name = VM_VDSO_ENABLED,
934 .procname = "vdso_enabled",
935 .data = &vdso_enabled,
936 .maxlen = sizeof(vdso_enabled),
937 .mode = 0644,
938 .proc_handler = &proc_dointvec,
939 .strategy = &sysctl_intvec,
940 .extra1 = &zero,
941 },
942#endif
931 { .ctl_name = 0 } 943 { .ctl_name = 0 }
932}; 944};
933 945