diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-06-27 05:53:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-27 20:32:38 -0400 |
commit | e6e5494cb23d1933735ee47cc674ffe1c4afed6f (patch) | |
tree | c8945bb3ae5bec38693d801fb589d22d48d6f8eb | |
parent | d5fb34261dcd32c9cb3b28121fdc46308db513a1 (diff) |
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it.
Besides the security implications, this feature also helps debuggers, which
can COW a vma-backed VDSO just like a normal DSO and can thus do
single-stepping and other debugging features.
It's good for hypervisors (Xen, VMWare) too, which typically live in the same
high-mapped address space as the VDSO, hence whenever the VDSO is used, they
get lots of guest pagefaults and have to fix such guest accesses up - which
slows things down instead of speeding things up (the primary purpose of the
VDSO).
There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support
for older glibcs that still rely on a prelinked high-mapped VDSO. Newer
distributions (using glibc 2.3.3 or later) can turn this option off. Turning
it off is also recommended for security reasons: attackers cannot use the
predictable high-mapped VDSO page as syscall trampoline anymore.
There is a new vdso=[0|1] boot option as well, and a runtime
/proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned
on/off.
(This version of the VDSO-randomization patch also has working ELF
coredumping, the previous patch crashed in the coredumping code.)
This code is a combined work of the exec-shield VDSO randomization
code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell
started this patch and i completed it.
[akpm@osdl.org: cleanups]
[akpm@osdl.org: compile fix]
[akpm@osdl.org: compile fix 2]
[akpm@osdl.org: compile fix 3]
[akpm@osdl.org: revernt MAXMEM change]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Cc: Gerd Hoffmann <kraxel@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | arch/i386/Kconfig | 11 | ||||
-rw-r--r-- | arch/i386/kernel/asm-offsets.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 7 | ||||
-rw-r--r-- | arch/i386/kernel/signal.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/sysenter.c | 128 | ||||
-rw-r--r-- | arch/i386/kernel/vsyscall-sysenter.S | 4 | ||||
-rw-r--r-- | arch/i386/kernel/vsyscall.lds.S | 4 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 30 | ||||
-rw-r--r-- | include/asm-i386/elf.h | 53 | ||||
-rw-r--r-- | include/asm-i386/fixmap.h | 10 | ||||
-rw-r--r-- | include/asm-i386/mmu.h | 1 | ||||
-rw-r--r-- | include/asm-i386/page.h | 3 | ||||
-rw-r--r-- | include/asm-i386/thread_info.h | 1 | ||||
-rw-r--r-- | include/asm-i386/unwind.h | 4 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 12 |
18 files changed, 235 insertions, 48 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2e352a605fcf..0d189c93eeaf 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1669,6 +1669,10 @@ running once the system is up. | |||
1669 | usbhid.mousepoll= | 1669 | usbhid.mousepoll= |
1670 | [USBHID] The interval which mice are to be polled at. | 1670 | [USBHID] The interval which mice are to be polled at. |
1671 | 1671 | ||
1672 | vdso= [IA-32] | ||
1673 | vdso=1: enable VDSO (default) | ||
1674 | vdso=0: disable VDSO mapping | ||
1675 | |||
1672 | video= [FB] Frame buffer configuration | 1676 | video= [FB] Frame buffer configuration |
1673 | See Documentation/fb/modedb.txt. | 1677 | See Documentation/fb/modedb.txt. |
1674 | 1678 | ||
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6662f8c44798..3bb221db164a 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -780,6 +780,17 @@ config HOTPLUG_CPU | |||
780 | enable suspend on SMP systems. CPUs can be controlled through | 780 | enable suspend on SMP systems. CPUs can be controlled through |
781 | /sys/devices/system/cpu. | 781 | /sys/devices/system/cpu. |
782 | 782 | ||
783 | config COMPAT_VDSO | ||
784 | bool "Compat VDSO support" | ||
785 | default y | ||
786 | help | ||
787 | Map the VDSO to the predictable old-style address too. | ||
788 | ---help--- | ||
789 | Say N here if you are running a sufficiently recent glibc | ||
790 | version (2.3.3 or later), to remove the high-mapped | ||
791 | VDSO mapping and to exclusively use the randomized VDSO. | ||
792 | |||
793 | If unsure, say Y. | ||
783 | 794 | ||
784 | endmenu | 795 | endmenu |
785 | 796 | ||
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1c3a809e6421..c80271f8f084 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/fixmap.h> | 14 | #include <asm/fixmap.h> |
15 | #include <asm/processor.h> | 15 | #include <asm/processor.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/elf.h> | ||
17 | 18 | ||
18 | #define DEFINE(sym, val) \ | 19 | #define DEFINE(sym, val) \ |
19 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 20 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
@@ -54,6 +55,7 @@ void foo(void) | |||
54 | OFFSET(TI_preempt_count, thread_info, preempt_count); | 55 | OFFSET(TI_preempt_count, thread_info, preempt_count); |
55 | OFFSET(TI_addr_limit, thread_info, addr_limit); | 56 | OFFSET(TI_addr_limit, thread_info, addr_limit); |
56 | OFFSET(TI_restart_block, thread_info, restart_block); | 57 | OFFSET(TI_restart_block, thread_info, restart_block); |
58 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); | ||
57 | BLANK(); | 59 | BLANK(); |
58 | 60 | ||
59 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | 61 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); |
@@ -69,7 +71,7 @@ void foo(void) | |||
69 | sizeof(struct tss_struct)); | 71 | sizeof(struct tss_struct)); |
70 | 72 | ||
71 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 73 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
72 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); | 74 | DEFINE(VDSO_PRELINK, VDSO_PRELINK); |
73 | 75 | ||
74 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 76 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
75 | } | 77 | } |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e8d2630fd19a..fbdb933251b6 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -270,7 +270,12 @@ sysenter_past_esp: | |||
270 | pushl $(__USER_CS) | 270 | pushl $(__USER_CS) |
271 | CFI_ADJUST_CFA_OFFSET 4 | 271 | CFI_ADJUST_CFA_OFFSET 4 |
272 | /*CFI_REL_OFFSET cs, 0*/ | 272 | /*CFI_REL_OFFSET cs, 0*/ |
273 | pushl $SYSENTER_RETURN | 273 | /* |
274 | * Push current_thread_info()->sysenter_return to the stack. | ||
275 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | ||
276 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | ||
277 | */ | ||
278 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | ||
274 | CFI_ADJUST_CFA_OFFSET 4 | 279 | CFI_ADJUST_CFA_OFFSET 4 |
275 | CFI_REL_OFFSET eip, 0 | 280 | CFI_REL_OFFSET eip, 0 |
276 | 281 | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 5c352c3a9e7f..43002cfb40c4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
351 | goto give_sigsegv; | 351 | goto give_sigsegv; |
352 | } | 352 | } |
353 | 353 | ||
354 | restorer = &__kernel_sigreturn; | 354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); |
355 | if (ka->sa.sa_flags & SA_RESTORER) | 355 | if (ka->sa.sa_flags & SA_RESTORER) |
356 | restorer = ka->sa.sa_restorer; | 356 | restorer = ka->sa.sa_restorer; |
357 | 357 | ||
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
447 | goto give_sigsegv; | 447 | goto give_sigsegv; |
448 | 448 | ||
449 | /* Set up to return from userspace. */ | 449 | /* Set up to return from userspace. */ |
450 | restorer = &__kernel_rt_sigreturn; | 450 | restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); |
451 | if (ka->sa.sa_flags & SA_RESTORER) | 451 | if (ka->sa.sa_flags & SA_RESTORER) |
452 | restorer = ka->sa.sa_restorer; | 452 | restorer = ka->sa.sa_restorer; |
453 | err |= __put_user(restorer, &frame->pretcode); | 453 | err |= __put_user(restorer, &frame->pretcode); |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 0bada1870bdf..c60419dee018 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * linux/arch/i386/kernel/sysenter.c | 2 | * linux/arch/i386/kernel/sysenter.c |
3 | * | 3 | * |
4 | * (C) Copyright 2002 Linus Torvalds | 4 | * (C) Copyright 2002 Linus Torvalds |
5 | * Portions based on the vdso-randomization code from exec-shield: | ||
6 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar | ||
5 | * | 7 | * |
6 | * This file contains the needed initializations to support sysenter. | 8 | * This file contains the needed initializations to support sysenter. |
7 | */ | 9 | */ |
@@ -13,12 +15,31 @@ | |||
13 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
14 | #include <linux/string.h> | 16 | #include <linux/string.h> |
15 | #include <linux/elf.h> | 17 | #include <linux/elf.h> |
18 | #include <linux/mm.h> | ||
19 | #include <linux/module.h> | ||
16 | 20 | ||
17 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
18 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
19 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
20 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
21 | 25 | ||
26 | /* | ||
27 | * Should the kernel map a VDSO page into processes and pass its | ||
28 | * address down to glibc upon exec()? | ||
29 | */ | ||
30 | unsigned int __read_mostly vdso_enabled = 1; | ||
31 | |||
32 | EXPORT_SYMBOL_GPL(vdso_enabled); | ||
33 | |||
34 | static int __init vdso_setup(char *s) | ||
35 | { | ||
36 | vdso_enabled = simple_strtoul(s, NULL, 0); | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | __setup("vdso=", vdso_setup); | ||
42 | |||
22 | extern asmlinkage void sysenter_entry(void); | 43 | extern asmlinkage void sysenter_entry(void); |
23 | 44 | ||
24 | void enable_sep_cpu(void) | 45 | void enable_sep_cpu(void) |
@@ -45,23 +66,122 @@ void enable_sep_cpu(void) | |||
45 | */ | 66 | */ |
46 | extern const char vsyscall_int80_start, vsyscall_int80_end; | 67 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
47 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 68 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
69 | static void *syscall_page; | ||
48 | 70 | ||
49 | int __init sysenter_setup(void) | 71 | int __init sysenter_setup(void) |
50 | { | 72 | { |
51 | void *page = (void *)get_zeroed_page(GFP_ATOMIC); | 73 | syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
52 | 74 | ||
53 | __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); | 75 | #ifdef CONFIG_COMPAT_VDSO |
76 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | ||
77 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | ||
78 | #else | ||
79 | /* | ||
80 | * In the non-compat case the ELF coredumping code needs the fixmap: | ||
81 | */ | ||
82 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO); | ||
83 | #endif | ||
54 | 84 | ||
55 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
56 | memcpy(page, | 86 | memcpy(syscall_page, |
57 | &vsyscall_int80_start, | 87 | &vsyscall_int80_start, |
58 | &vsyscall_int80_end - &vsyscall_int80_start); | 88 | &vsyscall_int80_end - &vsyscall_int80_start); |
59 | return 0; | 89 | return 0; |
60 | } | 90 | } |
61 | 91 | ||
62 | memcpy(page, | 92 | memcpy(syscall_page, |
63 | &vsyscall_sysenter_start, | 93 | &vsyscall_sysenter_start, |
64 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | 94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); |
65 | 95 | ||
66 | return 0; | 96 | return 0; |
67 | } | 97 | } |
98 | |||
99 | static struct page *syscall_nopage(struct vm_area_struct *vma, | ||
100 | unsigned long adr, int *type) | ||
101 | { | ||
102 | struct page *p = virt_to_page(adr - vma->vm_start + syscall_page); | ||
103 | get_page(p); | ||
104 | return p; | ||
105 | } | ||
106 | |||
107 | /* Prevent VMA merging */ | ||
108 | static void syscall_vma_close(struct vm_area_struct *vma) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | static struct vm_operations_struct syscall_vm_ops = { | ||
113 | .close = syscall_vma_close, | ||
114 | .nopage = syscall_nopage, | ||
115 | }; | ||
116 | |||
117 | /* Defined in vsyscall-sysenter.S */ | ||
118 | extern void SYSENTER_RETURN; | ||
119 | |||
120 | /* Setup a VMA at program startup for the vsyscall page */ | ||
121 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
122 | { | ||
123 | struct vm_area_struct *vma; | ||
124 | struct mm_struct *mm = current->mm; | ||
125 | unsigned long addr; | ||
126 | int ret; | ||
127 | |||
128 | down_write(&mm->mmap_sem); | ||
129 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
130 | if (IS_ERR_VALUE(addr)) { | ||
131 | ret = addr; | ||
132 | goto up_fail; | ||
133 | } | ||
134 | |||
135 | vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); | ||
136 | if (!vma) { | ||
137 | ret = -ENOMEM; | ||
138 | goto up_fail; | ||
139 | } | ||
140 | |||
141 | vma->vm_start = addr; | ||
142 | vma->vm_end = addr + PAGE_SIZE; | ||
143 | /* MAYWRITE to allow gdb to COW and set breakpoints */ | ||
144 | vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; | ||
145 | vma->vm_flags |= mm->def_flags; | ||
146 | vma->vm_page_prot = protection_map[vma->vm_flags & 7]; | ||
147 | vma->vm_ops = &syscall_vm_ops; | ||
148 | vma->vm_mm = mm; | ||
149 | |||
150 | ret = insert_vm_struct(mm, vma); | ||
151 | if (ret) | ||
152 | goto free_vma; | ||
153 | |||
154 | current->mm->context.vdso = (void *)addr; | ||
155 | current_thread_info()->sysenter_return = | ||
156 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
157 | mm->total_vm++; | ||
158 | up_fail: | ||
159 | up_write(&mm->mmap_sem); | ||
160 | return ret; | ||
161 | |||
162 | free_vma: | ||
163 | kmem_cache_free(vm_area_cachep, vma); | ||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | const char *arch_vma_name(struct vm_area_struct *vma) | ||
168 | { | ||
169 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) | ||
170 | return "[vdso]"; | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
174 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | int in_gate_area(struct task_struct *task, unsigned long addr) | ||
180 | { | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | int in_gate_area_no_task(unsigned long addr) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 3b62baa6a371..1a36d26e15eb 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S | |||
@@ -42,10 +42,10 @@ __kernel_vsyscall: | |||
42 | /* 7: align return point with nop's to make disassembly easier */ | 42 | /* 7: align return point with nop's to make disassembly easier */ |
43 | .space 7,0x90 | 43 | .space 7,0x90 |
44 | 44 | ||
45 | /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ | 45 | /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ |
46 | jmp .Lenter_kernel | 46 | jmp .Lenter_kernel |
47 | /* 16: System call normal return point is here! */ | 47 | /* 16: System call normal return point is here! */ |
48 | .globl SYSENTER_RETURN /* Symbol used by entry.S. */ | 48 | .globl SYSENTER_RETURN /* Symbol used by sysenter.c */ |
49 | SYSENTER_RETURN: | 49 | SYSENTER_RETURN: |
50 | pop %ebp | 50 | pop %ebp |
51 | .Lpop_ebp: | 51 | .Lpop_ebp: |
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index 98699ca6e52d..e26975fc68b6 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | SECTIONS | 8 | SECTIONS |
9 | { | 9 | { |
10 | . = VSYSCALL_BASE + SIZEOF_HEADERS; | 10 | . = VDSO_PRELINK + SIZEOF_HEADERS; |
11 | 11 | ||
12 | .hash : { *(.hash) } :text | 12 | .hash : { *(.hash) } :text |
13 | .dynsym : { *(.dynsym) } | 13 | .dynsym : { *(.dynsym) } |
@@ -20,7 +20,7 @@ SECTIONS | |||
20 | For the layouts to match, we need to skip more than enough | 20 | For the layouts to match, we need to skip more than enough |
21 | space for the dynamic symbol table et al. If this amount | 21 | space for the dynamic symbol table et al. If this amount |
22 | is insufficient, ld -shared will barf. Just increase it here. */ | 22 | is insufficient, ld -shared will barf. Just increase it here. */ |
23 | . = VSYSCALL_BASE + 0x400; | 23 | . = VDSO_PRELINK + 0x400; |
24 | 24 | ||
25 | .text : { *(.text) } :text =0x90909090 | 25 | .text : { *(.text) } :text =0x90909090 |
26 | .note : { *(.note.*) } :text :note | 26 | .note : { *(.note.*) } :text :note |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 0137ec4c1368..0a163a4f7764 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -122,6 +122,11 @@ struct mem_size_stats | |||
122 | unsigned long private_dirty; | 122 | unsigned long private_dirty; |
123 | }; | 123 | }; |
124 | 124 | ||
125 | __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma) | ||
126 | { | ||
127 | return NULL; | ||
128 | } | ||
129 | |||
125 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) | 130 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) |
126 | { | 131 | { |
127 | struct proc_maps_private *priv = m->private; | 132 | struct proc_maps_private *priv = m->private; |
@@ -158,22 +163,23 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats | |||
158 | pad_len_spaces(m, len); | 163 | pad_len_spaces(m, len); |
159 | seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); | 164 | seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); |
160 | } else { | 165 | } else { |
161 | if (mm) { | 166 | const char *name = arch_vma_name(vma); |
162 | if (vma->vm_start <= mm->start_brk && | 167 | if (!name) { |
168 | if (mm) { | ||
169 | if (vma->vm_start <= mm->start_brk && | ||
163 | vma->vm_end >= mm->brk) { | 170 | vma->vm_end >= mm->brk) { |
164 | pad_len_spaces(m, len); | 171 | name = "[heap]"; |
165 | seq_puts(m, "[heap]"); | 172 | } else if (vma->vm_start <= mm->start_stack && |
166 | } else { | 173 | vma->vm_end >= mm->start_stack) { |
167 | if (vma->vm_start <= mm->start_stack && | 174 | name = "[stack]"; |
168 | vma->vm_end >= mm->start_stack) { | ||
169 | |||
170 | pad_len_spaces(m, len); | ||
171 | seq_puts(m, "[stack]"); | ||
172 | } | 175 | } |
176 | } else { | ||
177 | name = "[vdso]"; | ||
173 | } | 178 | } |
174 | } else { | 179 | } |
180 | if (name) { | ||
175 | pad_len_spaces(m, len); | 181 | pad_len_spaces(m, len); |
176 | seq_puts(m, "[vdso]"); | 182 | seq_puts(m, name); |
177 | } | 183 | } |
178 | } | 184 | } |
179 | seq_putc(m, '\n'); | 185 | seq_putc(m, '\n'); |
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 4153d80e4d2b..1eac92cb5b16 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/system.h> /* for savesegment */ | 11 | #include <asm/system.h> /* for savesegment */ |
12 | #include <asm/auxvec.h> | 12 | #include <asm/auxvec.h> |
13 | #include <asm/desc.h> | ||
13 | 14 | ||
14 | #include <linux/utsname.h> | 15 | #include <linux/utsname.h> |
15 | 16 | ||
@@ -129,15 +130,41 @@ extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct | |||
129 | #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) | 130 | #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) |
130 | #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) | 131 | #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) |
131 | 132 | ||
132 | #define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) | 133 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) |
133 | #define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) | 134 | #define VDSO_BASE ((unsigned long)current->mm->context.vdso) |
134 | #define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) | 135 | |
136 | #ifdef CONFIG_COMPAT_VDSO | ||
137 | # define VDSO_COMPAT_BASE VDSO_HIGH_BASE | ||
138 | # define VDSO_PRELINK VDSO_HIGH_BASE | ||
139 | #else | ||
140 | # define VDSO_COMPAT_BASE VDSO_BASE | ||
141 | # define VDSO_PRELINK 0 | ||
142 | #endif | ||
143 | |||
144 | #define VDSO_COMPAT_SYM(x) \ | ||
145 | (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) | ||
146 | |||
147 | #define VDSO_SYM(x) \ | ||
148 | (VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK) | ||
149 | |||
150 | #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) | ||
151 | #define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) | ||
152 | |||
135 | extern void __kernel_vsyscall; | 153 | extern void __kernel_vsyscall; |
136 | 154 | ||
155 | #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) | ||
156 | |||
157 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES | ||
158 | struct linux_binprm; | ||
159 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | ||
160 | int executable_stack); | ||
161 | |||
162 | extern unsigned int vdso_enabled; | ||
163 | |||
137 | #define ARCH_DLINFO \ | 164 | #define ARCH_DLINFO \ |
138 | do { \ | 165 | do if (vdso_enabled) { \ |
139 | NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ | 166 | NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ |
140 | NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ | 167 | NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ |
141 | } while (0) | 168 | } while (0) |
142 | 169 | ||
143 | /* | 170 | /* |
@@ -148,15 +175,15 @@ do { \ | |||
148 | * Dumping its extra ELF program headers includes all the other information | 175 | * Dumping its extra ELF program headers includes all the other information |
149 | * a debugger needs to easily find how the vsyscall DSO was being used. | 176 | * a debugger needs to easily find how the vsyscall DSO was being used. |
150 | */ | 177 | */ |
151 | #define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) | 178 | #define ELF_CORE_EXTRA_PHDRS (VDSO_HIGH_EHDR->e_phnum) |
152 | #define ELF_CORE_WRITE_EXTRA_PHDRS \ | 179 | #define ELF_CORE_WRITE_EXTRA_PHDRS \ |
153 | do { \ | 180 | do { \ |
154 | const struct elf_phdr *const vsyscall_phdrs = \ | 181 | const struct elf_phdr *const vsyscall_phdrs = \ |
155 | (const struct elf_phdr *) (VSYSCALL_BASE \ | 182 | (const struct elf_phdr *) (VDSO_HIGH_BASE \ |
156 | + VSYSCALL_EHDR->e_phoff); \ | 183 | + VDSO_HIGH_EHDR->e_phoff); \ |
157 | int i; \ | 184 | int i; \ |
158 | Elf32_Off ofs = 0; \ | 185 | Elf32_Off ofs = 0; \ |
159 | for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ | 186 | for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \ |
160 | struct elf_phdr phdr = vsyscall_phdrs[i]; \ | 187 | struct elf_phdr phdr = vsyscall_phdrs[i]; \ |
161 | if (phdr.p_type == PT_LOAD) { \ | 188 | if (phdr.p_type == PT_LOAD) { \ |
162 | BUG_ON(ofs != 0); \ | 189 | BUG_ON(ofs != 0); \ |
@@ -174,10 +201,10 @@ do { \ | |||
174 | #define ELF_CORE_WRITE_EXTRA_DATA \ | 201 | #define ELF_CORE_WRITE_EXTRA_DATA \ |
175 | do { \ | 202 | do { \ |
176 | const struct elf_phdr *const vsyscall_phdrs = \ | 203 | const struct elf_phdr *const vsyscall_phdrs = \ |
177 | (const struct elf_phdr *) (VSYSCALL_BASE \ | 204 | (const struct elf_phdr *) (VDSO_HIGH_BASE \ |
178 | + VSYSCALL_EHDR->e_phoff); \ | 205 | + VDSO_HIGH_EHDR->e_phoff); \ |
179 | int i; \ | 206 | int i; \ |
180 | for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ | 207 | for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \ |
181 | if (vsyscall_phdrs[i].p_type == PT_LOAD) \ | 208 | if (vsyscall_phdrs[i].p_type == PT_LOAD) \ |
182 | DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ | 209 | DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ |
183 | PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ | 210 | PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \ |
diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index f7e068f4d2f9..a48cc3f7ccc6 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h | |||
@@ -51,7 +51,7 @@ | |||
51 | */ | 51 | */ |
52 | enum fixed_addresses { | 52 | enum fixed_addresses { |
53 | FIX_HOLE, | 53 | FIX_HOLE, |
54 | FIX_VSYSCALL, | 54 | FIX_VDSO, |
55 | #ifdef CONFIG_X86_LOCAL_APIC | 55 | #ifdef CONFIG_X86_LOCAL_APIC |
56 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ | 56 | FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ |
57 | #endif | 57 | #endif |
@@ -115,14 +115,6 @@ extern void __set_fixmap (enum fixed_addresses idx, | |||
115 | #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | 115 | #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) |
116 | #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | 116 | #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) |
117 | 117 | ||
118 | /* | ||
119 | * This is the range that is readable by user mode, and things | ||
120 | * acting like user mode such as get_user_pages. | ||
121 | */ | ||
122 | #define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) | ||
123 | #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) | ||
124 | |||
125 | |||
126 | extern void __this_fixmap_does_not_exist(void); | 118 | extern void __this_fixmap_does_not_exist(void); |
127 | 119 | ||
128 | /* | 120 | /* |
diff --git a/include/asm-i386/mmu.h b/include/asm-i386/mmu.h index f431a0b86d4c..8358dd3df7aa 100644 --- a/include/asm-i386/mmu.h +++ b/include/asm-i386/mmu.h | |||
@@ -12,6 +12,7 @@ typedef struct { | |||
12 | int size; | 12 | int size; |
13 | struct semaphore sem; | 13 | struct semaphore sem; |
14 | void *ldt; | 14 | void *ldt; |
15 | void *vdso; | ||
15 | } mm_context_t; | 16 | } mm_context_t; |
16 | 17 | ||
17 | #endif | 18 | #endif |
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index e3a552fa5538..f5bf544c729a 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h | |||
@@ -96,6 +96,8 @@ typedef struct { unsigned long pgprot; } pgprot_t; | |||
96 | 96 | ||
97 | #ifndef __ASSEMBLY__ | 97 | #ifndef __ASSEMBLY__ |
98 | 98 | ||
99 | struct vm_area_struct; | ||
100 | |||
99 | /* | 101 | /* |
100 | * This much address space is reserved for vmalloc() and iomap() | 102 | * This much address space is reserved for vmalloc() and iomap() |
101 | * as well as fixmap mappings. | 103 | * as well as fixmap mappings. |
@@ -139,6 +141,7 @@ extern int page_is_ram(unsigned long pagenr); | |||
139 | #include <asm-generic/memory_model.h> | 141 | #include <asm-generic/memory_model.h> |
140 | #include <asm-generic/page.h> | 142 | #include <asm-generic/page.h> |
141 | 143 | ||
144 | #define __HAVE_ARCH_GATE_AREA 1 | ||
142 | #endif /* __KERNEL__ */ | 145 | #endif /* __KERNEL__ */ |
143 | 146 | ||
144 | #endif /* _I386_PAGE_H */ | 147 | #endif /* _I386_PAGE_H */ |
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index ff1e2b1a7c84..2833fa2c0dd0 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h | |||
@@ -37,6 +37,7 @@ struct thread_info { | |||
37 | 0-0xBFFFFFFF for user-thead | 37 | 0-0xBFFFFFFF for user-thead |
38 | 0-0xFFFFFFFF for kernel-thread | 38 | 0-0xFFFFFFFF for kernel-thread |
39 | */ | 39 | */ |
40 | void *sysenter_return; | ||
40 | struct restart_block restart_block; | 41 | struct restart_block restart_block; |
41 | 42 | ||
42 | unsigned long previous_esp; /* ESP of the previous stack in case | 43 | unsigned long previous_esp; /* ESP of the previous stack in case |
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index d480f2e38215..69f0f1df6722 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h | |||
@@ -78,8 +78,8 @@ static inline int arch_unw_user_mode(const struct unwind_frame_info *info) | |||
78 | return user_mode_vm(&info->regs); | 78 | return user_mode_vm(&info->regs); |
79 | #else | 79 | #else |
80 | return info->regs.eip < PAGE_OFFSET | 80 | return info->regs.eip < PAGE_OFFSET |
81 | || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL) | 81 | || (info->regs.eip >= __fix_to_virt(FIX_VDSO) |
82 | && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE) | 82 | && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) |
83 | || info->regs.esp < PAGE_OFFSET; | 83 | || info->regs.esp < PAGE_OFFSET; |
84 | #endif | 84 | #endif |
85 | } | 85 | } |
diff --git a/include/linux/mm.h b/include/linux/mm.h index a929ea197e48..ff1fa87df8d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1065,5 +1065,7 @@ void drop_slab(void); | |||
1065 | extern int randomize_va_space; | 1065 | extern int randomize_va_space; |
1066 | #endif | 1066 | #endif |
1067 | 1067 | ||
1068 | const char *arch_vma_name(struct vm_area_struct *vma); | ||
1069 | |||
1068 | #endif /* __KERNEL__ */ | 1070 | #endif /* __KERNEL__ */ |
1069 | #endif /* _LINUX_MM_H */ | 1071 | #endif /* _LINUX_MM_H */ |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 349ef908a222..bee12a7a0576 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -189,6 +189,7 @@ enum | |||
189 | VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ | 189 | VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ |
190 | VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ | 190 | VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ |
191 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ | 191 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ |
192 | VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ | ||
192 | }; | 193 | }; |
193 | 194 | ||
194 | 195 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f1a4eb1a655e..f54afed8426f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -928,6 +928,18 @@ static ctl_table vm_table[] = { | |||
928 | .strategy = &sysctl_jiffies, | 928 | .strategy = &sysctl_jiffies, |
929 | }, | 929 | }, |
930 | #endif | 930 | #endif |
931 | #ifdef CONFIG_X86_32 | ||
932 | { | ||
933 | .ctl_name = VM_VDSO_ENABLED, | ||
934 | .procname = "vdso_enabled", | ||
935 | .data = &vdso_enabled, | ||
936 | .maxlen = sizeof(vdso_enabled), | ||
937 | .mode = 0644, | ||
938 | .proc_handler = &proc_dointvec, | ||
939 | .strategy = &sysctl_intvec, | ||
940 | .extra1 = &zero, | ||
941 | }, | ||
942 | #endif | ||
931 | { .ctl_name = 0 } | 943 | { .ctl_name = 0 } |
932 | }; | 944 | }; |
933 | 945 | ||