diff options
author | Andi Kleen <ak@muc.de> | 2005-04-16 18:24:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:24:55 -0400 |
commit | 1e01441051dda3bb01c455b6e20bce6d00563d82 (patch) | |
tree | 5dc4c69dd4522ca569f70ead0ecbb923f1451891 /arch/x86_64 | |
parent | 35faa71484287fc150b8498cd5acae59ad17a356 (diff) |
[PATCH] x86_64: Use a VMA for the 32bit vsyscall
Use a real VMA to map the 32bit vsyscall page
This interacts better with Hugh's upcomming VMA walk optimization
Also removes some ugly special cases.
Code roughly modelled after the ppc64 vdso version from Ben Herrenschmidt.
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/syscall32.c | 92 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 12 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 26 |
4 files changed, 61 insertions, 73 deletions
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 93d568dfa762..99b522052d16 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c | |||
@@ -312,6 +312,10 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); | |||
312 | 312 | ||
313 | static void elf32_init(struct pt_regs *); | 313 | static void elf32_init(struct pt_regs *); |
314 | 314 | ||
315 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | ||
316 | #define arch_setup_additional_pages syscall32_setup_pages | ||
317 | extern int syscall32_setup_pages(struct linux_binprm *, int exstack); | ||
318 | |||
315 | #include "../../../fs/binfmt_elf.c" | 319 | #include "../../../fs/binfmt_elf.c" |
316 | 320 | ||
317 | static void elf32_init(struct pt_regs *regs) | 321 | static void elf32_init(struct pt_regs *regs) |
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 399ff4985099..01d8db1a1c09 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/gfp.h> | 9 | #include <linux/gfp.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/stringify.h> | 11 | #include <linux/stringify.h> |
12 | #include <linux/security.h> | ||
12 | #include <asm/proto.h> | 13 | #include <asm/proto.h> |
13 | #include <asm/tlbflush.h> | 14 | #include <asm/tlbflush.h> |
14 | #include <asm/ia32_unistd.h> | 15 | #include <asm/ia32_unistd.h> |
@@ -30,51 +31,57 @@ extern int sysctl_vsyscall32; | |||
30 | char *syscall32_page; | 31 | char *syscall32_page; |
31 | static int use_sysenter = -1; | 32 | static int use_sysenter = -1; |
32 | 33 | ||
33 | /* | 34 | static struct page * |
34 | * Map the 32bit vsyscall page on demand. | 35 | syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) |
35 | * | 36 | { |
36 | * RED-PEN: This knows too much about high level VM. | 37 | struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); |
37 | * | 38 | get_page(p); |
38 | * Alternative would be to generate a vma with appropriate backing options | 39 | return p; |
39 | * and let it be handled by generic VM. | ||
40 | */ | ||
41 | int __map_syscall32(struct mm_struct *mm, unsigned long address) | ||
42 | { | ||
43 | pgd_t *pgd; | ||
44 | pud_t *pud; | ||
45 | pte_t *pte; | ||
46 | pmd_t *pmd; | ||
47 | int err = -ENOMEM; | ||
48 | |||
49 | spin_lock(&mm->page_table_lock); | ||
50 | pgd = pgd_offset(mm, address); | ||
51 | pud = pud_alloc(mm, pgd, address); | ||
52 | if (pud) { | ||
53 | pmd = pmd_alloc(mm, pud, address); | ||
54 | if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { | ||
55 | if (pte_none(*pte)) { | ||
56 | set_pte(pte, | ||
57 | mk_pte(virt_to_page(syscall32_page), | ||
58 | PAGE_KERNEL_VSYSCALL32)); | ||
59 | } | ||
60 | /* Flush only the local CPU. Other CPUs taking a fault | ||
61 | will just end up here again | ||
62 | This probably not needed and just paranoia. */ | ||
63 | __flush_tlb_one(address); | ||
64 | err = 0; | ||
65 | } | ||
66 | } | ||
67 | spin_unlock(&mm->page_table_lock); | ||
68 | return err; | ||
69 | } | 40 | } |
70 | 41 | ||
71 | int map_syscall32(struct mm_struct *mm, unsigned long address) | 42 | /* Prevent VMA merging */ |
43 | static void syscall32_vma_close(struct vm_area_struct *vma) | ||
72 | { | 44 | { |
73 | int err; | 45 | } |
74 | down_read(&mm->mmap_sem); | 46 | |
75 | err = __map_syscall32(mm, address); | 47 | static struct vm_operations_struct syscall32_vm_ops = { |
76 | up_read(&mm->mmap_sem); | 48 | .close = syscall32_vma_close, |
77 | return err; | 49 | .nopage = syscall32_nopage, |
50 | }; | ||
51 | |||
52 | struct linux_binprm; | ||
53 | |||
54 | /* Setup a VMA at program startup for the vsyscall page */ | ||
55 | int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) | ||
56 | { | ||
57 | int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; | ||
58 | struct vm_area_struct *vma; | ||
59 | struct mm_struct *mm = current->mm; | ||
60 | |||
61 | vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); | ||
62 | if (!vma) | ||
63 | return -ENOMEM; | ||
64 | if (security_vm_enough_memory(npages)) { | ||
65 | kmem_cache_free(vm_area_cachep, vma); | ||
66 | return -ENOMEM; | ||
67 | } | ||
68 | |||
69 | memset(vma, 0, sizeof(struct vm_area_struct)); | ||
70 | /* Could randomize here */ | ||
71 | vma->vm_start = VSYSCALL32_BASE; | ||
72 | vma->vm_end = VSYSCALL32_END; | ||
73 | /* MAYWRITE to allow gdb to COW and set breakpoints */ | ||
74 | vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE; | ||
75 | vma->vm_flags |= mm->def_flags; | ||
76 | vma->vm_page_prot = protection_map[vma->vm_flags & 7]; | ||
77 | vma->vm_ops = &syscall32_vm_ops; | ||
78 | vma->vm_mm = mm; | ||
79 | |||
80 | down_write(&mm->mmap_sem); | ||
81 | insert_vm_struct(mm, vma); | ||
82 | mm->total_vm += npages; | ||
83 | up_write(&mm->mmap_sem); | ||
84 | return 0; | ||
78 | } | 85 | } |
79 | 86 | ||
80 | static int __init init_syscall32(void) | 87 | static int __init init_syscall32(void) |
@@ -82,7 +89,6 @@ static int __init init_syscall32(void) | |||
82 | syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); | 89 | syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); |
83 | if (!syscall32_page) | 90 | if (!syscall32_page) |
84 | panic("Cannot allocate syscall32 page"); | 91 | panic("Cannot allocate syscall32 page"); |
85 | SetPageReserved(virt_to_page(syscall32_page)); | ||
86 | if (use_sysenter > 0) { | 92 | if (use_sysenter > 0) { |
87 | memcpy(syscall32_page, syscall32_sysenter, | 93 | memcpy(syscall32_page, syscall32_sysenter, |
88 | syscall32_sysenter_end - syscall32_sysenter); | 94 | syscall32_sysenter_end - syscall32_sysenter); |
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 5724370475cc..d4676ca09144 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -458,17 +458,6 @@ bad_area: | |||
458 | up_read(&mm->mmap_sem); | 458 | up_read(&mm->mmap_sem); |
459 | 459 | ||
460 | bad_area_nosemaphore: | 460 | bad_area_nosemaphore: |
461 | |||
462 | #ifdef CONFIG_IA32_EMULATION | ||
463 | /* 32bit vsyscall. map on demand. */ | ||
464 | if (test_thread_flag(TIF_IA32) && | ||
465 | address >= VSYSCALL32_BASE && address < VSYSCALL32_END) { | ||
466 | if (map_syscall32(mm, address) < 0) | ||
467 | goto out_of_memory2; | ||
468 | return; | ||
469 | } | ||
470 | #endif | ||
471 | |||
472 | /* User mode accesses just cause a SIGSEGV */ | 461 | /* User mode accesses just cause a SIGSEGV */ |
473 | if (error_code & 4) { | 462 | if (error_code & 4) { |
474 | if (is_prefetch(regs, address, error_code)) | 463 | if (is_prefetch(regs, address, error_code)) |
@@ -550,7 +539,6 @@ no_context: | |||
550 | */ | 539 | */ |
551 | out_of_memory: | 540 | out_of_memory: |
552 | up_read(&mm->mmap_sem); | 541 | up_read(&mm->mmap_sem); |
553 | out_of_memory2: | ||
554 | if (current->pid == 1) { | 542 | if (current->pid == 1) { |
555 | yield(); | 543 | yield(); |
556 | goto again; | 544 | goto again; |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b0d604551d86..dbe53b4c7e66 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -583,9 +583,9 @@ static __init int x8664_sysctl_init(void) | |||
583 | __initcall(x8664_sysctl_init); | 583 | __initcall(x8664_sysctl_init); |
584 | #endif | 584 | #endif |
585 | 585 | ||
586 | /* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two | 586 | /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only |
587 | different ones: one for 32bit and one for 64bit. Use the appropiate | 587 | covers the 64bit vsyscall page now. 32bit has a real VMA now and does |
588 | for the target task. */ | 588 | not need special handling anymore. */ |
589 | 589 | ||
590 | static struct vm_area_struct gate_vma = { | 590 | static struct vm_area_struct gate_vma = { |
591 | .vm_start = VSYSCALL_START, | 591 | .vm_start = VSYSCALL_START, |
@@ -593,22 +593,11 @@ static struct vm_area_struct gate_vma = { | |||
593 | .vm_page_prot = PAGE_READONLY | 593 | .vm_page_prot = PAGE_READONLY |
594 | }; | 594 | }; |
595 | 595 | ||
596 | static struct vm_area_struct gate32_vma = { | ||
597 | .vm_start = VSYSCALL32_BASE, | ||
598 | .vm_end = VSYSCALL32_END, | ||
599 | .vm_page_prot = PAGE_READONLY | ||
600 | }; | ||
601 | |||
602 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 596 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
603 | { | 597 | { |
604 | #ifdef CONFIG_IA32_EMULATION | 598 | #ifdef CONFIG_IA32_EMULATION |
605 | if (test_tsk_thread_flag(tsk, TIF_IA32)) { | 599 | if (test_tsk_thread_flag(tsk, TIF_IA32)) |
606 | /* lookup code assumes the pages are present. set them up | 600 | return NULL; |
607 | now */ | ||
608 | if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0) | ||
609 | return NULL; | ||
610 | return &gate32_vma; | ||
611 | } | ||
612 | #endif | 601 | #endif |
613 | return &gate_vma; | 602 | return &gate_vma; |
614 | } | 603 | } |
@@ -616,6 +605,8 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | |||
616 | int in_gate_area(struct task_struct *task, unsigned long addr) | 605 | int in_gate_area(struct task_struct *task, unsigned long addr) |
617 | { | 606 | { |
618 | struct vm_area_struct *vma = get_gate_vma(task); | 607 | struct vm_area_struct *vma = get_gate_vma(task); |
608 | if (!vma) | ||
609 | return 0; | ||
619 | return (addr >= vma->vm_start) && (addr < vma->vm_end); | 610 | return (addr >= vma->vm_start) && (addr < vma->vm_end); |
620 | } | 611 | } |
621 | 612 | ||
@@ -625,6 +616,5 @@ int in_gate_area(struct task_struct *task, unsigned long addr) | |||
625 | */ | 616 | */ |
626 | int in_gate_area_no_task(unsigned long addr) | 617 | int in_gate_area_no_task(unsigned long addr) |
627 | { | 618 | { |
628 | return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) || | 619 | return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); |
629 | ((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END))); | ||
630 | } | 620 | } |