aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/sysenter.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2007-05-02 13:27:12 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:12 -0400
commit1dbf527c51c6c20c19869c8125cb5b87c3d09506 (patch)
treeab480b3eff886e54c6d3c881936f046599e7ea61 /arch/i386/kernel/sysenter.c
parentd4f7a2c18e59e0304a1c733589ce14fc02fec1bd (diff)
[PATCH] i386: Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. +From: Hugh Dickins <hugh@veritas.com> Fix oops from i386-make-compat_vdso-runtime-selectable.patch. Even mingetty at system startup finds it easy to trigger an oops while reading /proc/PID/maps: though it has a good hold on the mm itself, that cannot stop exit_mm() from resetting tsk->mm to NULL. (It is usually show_map()'s call to get_gate_vma() which oopses, and I expect we could change that to check priv->tail_vma instead; but no matter, even m_start()'s call just after get_task_mm() is racy.) Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com>
Diffstat (limited to 'arch/i386/kernel/sysenter.c')
-rw-r--r--arch/i386/kernel/sysenter.c145
1 files changed, 94 insertions, 51 deletions
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index e5a958379ac9..0b9768ee1e8d 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -23,16 +23,25 @@
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/unistd.h> 24#include <asm/unistd.h>
25#include <asm/elf.h> 25#include <asm/elf.h>
26#include <asm/tlbflush.h>
27
28enum {
29 VDSO_DISABLED = 0,
30 VDSO_ENABLED = 1,
31 VDSO_COMPAT = 2,
32};
33
34#ifdef CONFIG_COMPAT_VDSO
35#define VDSO_DEFAULT VDSO_COMPAT
36#else
37#define VDSO_DEFAULT VDSO_ENABLED
38#endif
26 39
27/* 40/*
28 * Should the kernel map a VDSO page into processes and pass its 41 * Should the kernel map a VDSO page into processes and pass its
29 * address down to glibc upon exec()? 42 * address down to glibc upon exec()?
30 */ 43 */
31#ifdef CONFIG_PARAVIRT 44unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
32unsigned int __read_mostly vdso_enabled = 0;
33#else
34unsigned int __read_mostly vdso_enabled = 1;
35#endif
36 45
37EXPORT_SYMBOL_GPL(vdso_enabled); 46EXPORT_SYMBOL_GPL(vdso_enabled);
38 47
@@ -47,7 +56,6 @@ __setup("vdso=", vdso_setup);
47 56
48extern asmlinkage void sysenter_entry(void); 57extern asmlinkage void sysenter_entry(void);
49 58
50#ifdef CONFIG_COMPAT_VDSO
51static __init void reloc_symtab(Elf32_Ehdr *ehdr, 59static __init void reloc_symtab(Elf32_Ehdr *ehdr,
52 unsigned offset, unsigned size) 60 unsigned offset, unsigned size)
53{ 61{
@@ -164,11 +172,6 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
164 shdr[i].sh_size); 172 shdr[i].sh_size);
165 } 173 }
166} 174}
167#else
168static inline void relocate_vdso(Elf32_Ehdr *ehdr)
169{
170}
171#endif /* COMPAT_VDSO */
172 175
173void enable_sep_cpu(void) 176void enable_sep_cpu(void)
174{ 177{
@@ -188,6 +191,25 @@ void enable_sep_cpu(void)
188 put_cpu(); 191 put_cpu();
189} 192}
190 193
194static struct vm_area_struct gate_vma;
195
196static int __init gate_vma_init(void)
197{
198 gate_vma.vm_mm = NULL;
199 gate_vma.vm_start = FIXADDR_USER_START;
200 gate_vma.vm_end = FIXADDR_USER_END;
201 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
202 gate_vma.vm_page_prot = __P101;
203 /*
204 * Make sure the vDSO gets into every core dump.
205 * Dumping its contents makes post-mortem fully interpretable later
206 * without matching up the same kernel and hardware config to see
207 * what PC values meant.
208 */
209 gate_vma.vm_flags |= VM_ALWAYSDUMP;
210 return 0;
211}
212
191/* 213/*
192 * These symbols are defined by vsyscall.o to mark the bounds 214 * These symbols are defined by vsyscall.o to mark the bounds
193 * of the ELF DSO images included therein. 215 * of the ELF DSO images included therein.
@@ -196,6 +218,22 @@ extern const char vsyscall_int80_start, vsyscall_int80_end;
196extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 218extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
197static struct page *syscall_pages[1]; 219static struct page *syscall_pages[1];
198 220
221static void map_compat_vdso(int map)
222{
223 static int vdso_mapped;
224
225 if (map == vdso_mapped)
226 return;
227
228 vdso_mapped = map;
229
230 __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
231 map ? PAGE_READONLY_EXEC : PAGE_NONE);
232
233 /* flush stray tlbs */
234 flush_tlb_all();
235}
236
199int __init sysenter_setup(void) 237int __init sysenter_setup(void)
200{ 238{
201 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); 239 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -204,10 +242,9 @@ int __init sysenter_setup(void)
204 242
205 syscall_pages[0] = virt_to_page(syscall_page); 243 syscall_pages[0] = virt_to_page(syscall_page);
206 244
207#ifdef CONFIG_COMPAT_VDSO 245 gate_vma_init();
208 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); 246
209 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); 247 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
210#endif
211 248
212 if (!boot_cpu_has(X86_FEATURE_SEP)) { 249 if (!boot_cpu_has(X86_FEATURE_SEP)) {
213 vsyscall = &vsyscall_int80_start; 250 vsyscall = &vsyscall_int80_start;
@@ -226,42 +263,57 @@ int __init sysenter_setup(void)
226/* Defined in vsyscall-sysenter.S */ 263/* Defined in vsyscall-sysenter.S */
227extern void SYSENTER_RETURN; 264extern void SYSENTER_RETURN;
228 265
229#ifdef __HAVE_ARCH_GATE_AREA
230/* Setup a VMA at program startup for the vsyscall page */ 266/* Setup a VMA at program startup for the vsyscall page */
231int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) 267int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
232{ 268{
233 struct mm_struct *mm = current->mm; 269 struct mm_struct *mm = current->mm;
234 unsigned long addr; 270 unsigned long addr;
235 int ret; 271 int ret;
272 bool compat;
236 273
237 down_write(&mm->mmap_sem); 274 down_write(&mm->mmap_sem);
238 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
239 if (IS_ERR_VALUE(addr)) {
240 ret = addr;
241 goto up_fail;
242 }
243 275
244 /* 276 /* Test compat mode once here, in case someone
245 * MAYWRITE to allow gdb to COW and set breakpoints 277 changes it via sysctl */
246 * 278 compat = (vdso_enabled == VDSO_COMPAT);
247 * Make sure the vDSO gets into every core dump. 279
248 * Dumping its contents makes post-mortem fully interpretable later 280 map_compat_vdso(compat);
249 * without matching up the same kernel and hardware config to see 281
250 * what PC values meant. 282 if (compat)
251 */ 283 addr = VDSO_HIGH_BASE;
252 ret = install_special_mapping(mm, addr, PAGE_SIZE, 284 else {
253 VM_READ|VM_EXEC| 285 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
254 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 286 if (IS_ERR_VALUE(addr)) {
255 VM_ALWAYSDUMP, 287 ret = addr;
256 syscall_pages); 288 goto up_fail;
257 if (ret) 289 }
258 goto up_fail; 290
291 /*
292 * MAYWRITE to allow gdb to COW and set breakpoints
293 *
294 * Make sure the vDSO gets into every core dump.
295 * Dumping its contents makes post-mortem fully
296 * interpretable later without matching up the same
297 * kernel and hardware config to see what PC values
298 * meant.
299 */
300 ret = install_special_mapping(mm, addr, PAGE_SIZE,
301 VM_READ|VM_EXEC|
302 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
303 VM_ALWAYSDUMP,
304 syscall_pages);
305
306 if (ret)
307 goto up_fail;
308 }
259 309
260 current->mm->context.vdso = (void *)addr; 310 current->mm->context.vdso = (void *)addr;
261 current_thread_info()->sysenter_return = 311 current_thread_info()->sysenter_return =
262 (void *)VDSO_SYM(&SYSENTER_RETURN); 312 (void *)VDSO_SYM(&SYSENTER_RETURN);
263up_fail: 313
314 up_fail:
264 up_write(&mm->mmap_sem); 315 up_write(&mm->mmap_sem);
316
265 return ret; 317 return ret;
266} 318}
267 319
@@ -274,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma)
274 326
275struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 327struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
276{ 328{
329 struct mm_struct *mm = tsk->mm;
330
331 /* Check to see if this task was created in compat vdso mode */
332 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
333 return &gate_vma;
277 return NULL; 334 return NULL;
278} 335}
279 336
@@ -286,17 +343,3 @@ int in_gate_area_no_task(unsigned long addr)
286{ 343{
287 return 0; 344 return 0;
288} 345}
289#else /* !__HAVE_ARCH_GATE_AREA */
290int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
291{
292 /*
293 * If not creating userspace VMA, simply set vdso to point to
294 * fixmap page.
295 */
296 current->mm->context.vdso = (void *)VDSO_HIGH_BASE;
297 current_thread_info()->sysenter_return =
298 (void *)VDSO_SYM(&SYSENTER_RETURN);
299
300 return 0;
301}
302#endif /* __HAVE_ARCH_GATE_AREA */