aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um
diff options
context:
space:
mode:
authorJeff Dike <jdike@addtoit.com>2008-02-05 01:31:01 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-05 12:44:29 -0500
commit3963333fe6767f15141ab2dc3b933721c636c212 (patch)
tree62fbec62adf1796709dfa197e12dd725911e0fc9 /arch/um
parent42a2b54ce8c7b9d4f418995a7950e7e2e15e52ce (diff)
uml: cover stubs with a VMA
Give the stubs a VMA. This allows the removal of a truly nasty kludge to make sure that mm->nr_ptes was correct in exit_mmap. The underlying problem was always that the stubs, which have ptes, and thus allocated a page table, weren't covered by a VMA. This patch fixes that by using install_special_mapping in arch_dup_mmap and activate_context to create the VMA. The stubs have to be moved, since shift_arg_pages seems to assume that the stack is the only VMA present at that point during exec, and uses vma_adjust to fiddle its VMA. However, that extends the stub VMA by the amount removed from the stack VMA. To avoid this problem, the stubs were moved to a different fixed location at the start of the address space. The init_stub_pte calls were moved from init_new_context to arch_dup_mmap because I was occasionally seeing arch_dup_mmap not being called, causing exit_mmap to die. Rather than figure out what was really happening, I decided it was cleaner to just move the calls so that there's no doubt that both the pte and VMA creation happen, no matter what. arch_exit_mmap is used to clear the stub ptes at exit time. The STUB_* constants in as-layout.h no longer depend on UM_TASK_SIZE, that that definition is removed, along with the comments complaining about gcc. Because the stubs are no longer at the top of the address space, some care is needed while flushing TLBs. update_pte_range checks for addresses in the stub range and skips them. flush_thread now issues two unmaps, one for the range before STUB_START and one for the range after STUB_END. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/include/as-layout.h19
-rw-r--r--arch/um/include/common-offsets.h3
-rw-r--r--arch/um/kernel/exec.c5
-rw-r--r--arch/um/kernel/skas/mmu.c104
-rw-r--r--arch/um/kernel/tlb.c11
5 files changed, 70 insertions, 72 deletions
diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h
index a2008f550fee..606bb5c7fdf6 100644
--- a/arch/um/include/as-layout.h
+++ b/arch/um/include/as-layout.h
@@ -29,21 +29,10 @@
29#define _AC(X, Y) __AC(X, Y) 29#define _AC(X, Y) __AC(X, Y)
30#endif 30#endif
31 31
32/* 32#define STUB_START _AC(, 0x100000)
33 * The "- 1"'s are to avoid gcc complaining about integer overflows 33#define STUB_CODE _AC((unsigned long), STUB_START)
34 * and unrepresentable decimal constants. With 3-level page tables, 34#define STUB_DATA _AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE)
35 * TASK_SIZE is 0x80000000, which gets turned into its signed decimal 35#define STUB_END _AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE)
36 * equivalent in asm-offsets.s. gcc then complains about that being
37 * unsigned only in C90. To avoid that, UM_TASK_SIZE is defined as
38 * TASK_SIZE - 1. To compensate, we need to add the 1 back here.
39 * However, adding it back to UM_TASK_SIZE produces more gcc
40 * complaints. So, I adjust the thing being subtracted from
41 * UM_TASK_SIZE instead. Bah.
42 */
43#define STUB_CODE _AC((unsigned long), \
44 UM_TASK_SIZE - (2 * UM_KERN_PAGE_SIZE - 1))
45#define STUB_DATA _AC((unsigned long), UM_TASK_SIZE - (UM_KERN_PAGE_SIZE - 1))
46#define STUB_START _AC(, STUB_CODE)
47 36
48#ifndef __ASSEMBLY__ 37#ifndef __ASSEMBLY__
49 38
diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h
index 5b67d7ced2a7..b54bd35585c2 100644
--- a/arch/um/include/common-offsets.h
+++ b/arch/um/include/common-offsets.h
@@ -39,6 +39,3 @@ DEFINE(UM_HZ, HZ);
39DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC); 39DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC);
40DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); 40DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
41DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); 41DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
42
43/* See as-layout.h for an explanation of the "- 1". Bah. */
44DEFINE(UM_TASK_SIZE, TASK_SIZE - 1);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index bf66b5b7bc68..76a62c0cb2bc 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -19,12 +19,13 @@
19void flush_thread(void) 19void flush_thread(void)
20{ 20{
21 void *data = NULL; 21 void *data = NULL;
22 unsigned long end = proc_mm ? TASK_SIZE : STUB_START;
23 int ret; 22 int ret;
24 23
25 arch_flush_thread(&current->thread.arch); 24 arch_flush_thread(&current->thread.arch);
26 25
27 ret = unmap(&current->mm->context.id, 0, end, 1, &data); 26 ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
27 ret = ret || unmap(&current->mm->context.id, STUB_END,
28 TASK_SIZE - STUB_END, 1, &data);
28 if (ret) { 29 if (ret) {
29 printk(KERN_ERR "flush_thread - clearing address space failed, " 30 printk(KERN_ERR "flush_thread - clearing address space failed, "
30 "err = %d\n", ret); 31 "err = %d\n", ret);
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 6da9ab4f5a18..e8dc8540d444 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -34,25 +34,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
34 if (!pte) 34 if (!pte)
35 goto out_pte; 35 goto out_pte;
36 36
37 /*
38 * There's an interaction between the skas0 stub pages, stack
39 * randomization, and the BUG at the end of exit_mmap. exit_mmap
40 * checks that the number of page tables freed is the same as had
41 * been allocated. If the stack is on the last page table page,
42 * then the stack pte page will be freed, and if not, it won't. To
43 * avoid having to know where the stack is, or if the process mapped
44 * something at the top of its address space for some other reason,
45 * we set TASK_SIZE to end at the start of the last page table.
46 * This keeps exit_mmap off the last page, but introduces a leak
47 * of that page. So, we hang onto it here and free it in
48 * destroy_context_skas.
49 */
50
51 mm->context.last_page_table = pmd_page_vaddr(*pmd);
52#ifdef CONFIG_3_LEVEL_PGTABLES
53 mm->context.last_pmd = (unsigned long) __va(pud_val(*pud));
54#endif
55
56 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); 37 *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
57 *pte = pte_mkread(*pte); 38 *pte = pte_mkread(*pte);
58 return 0; 39 return 0;
@@ -76,24 +57,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
76 stack = get_zeroed_page(GFP_KERNEL); 57 stack = get_zeroed_page(GFP_KERNEL);
77 if (stack == 0) 58 if (stack == 0)
78 goto out; 59 goto out;
79
80 /*
81 * This zeros the entry that pgd_alloc didn't, needed since
82 * we are about to reinitialize it, and want mm.nr_ptes to
83 * be accurate.
84 */
85 mm->pgd[USER_PTRS_PER_PGD] = __pgd(0);
86
87 ret = init_stub_pte(mm, STUB_CODE,
88 (unsigned long) &__syscall_stub_start);
89 if (ret)
90 goto out_free;
91
92 ret = init_stub_pte(mm, STUB_DATA, stack);
93 if (ret)
94 goto out_free;
95
96 mm->nr_ptes--;
97 } 60 }
98 61
99 to_mm->id.stack = stack; 62 to_mm->id.stack = stack;
@@ -137,6 +100,64 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
137 return ret; 100 return ret;
138} 101}
139 102
103void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
104{
105 struct page **pages;
106 int err, ret;
107
108 if (!skas_needs_stub)
109 return;
110
111 ret = init_stub_pte(mm, STUB_CODE,
112 (unsigned long) &__syscall_stub_start);
113 if (ret)
114 goto out;
115
116 ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
117 if (ret)
118 goto out;
119
120 pages = kmalloc(2 * sizeof(struct page *), GFP_KERNEL);
121 if (pages == NULL) {
122 printk(KERN_ERR "arch_dup_mmap failed to allocate 2 page "
123 "pointers\n");
124 goto out;
125 }
126
127 pages[0] = virt_to_page(&__syscall_stub_start);
128 pages[1] = virt_to_page(mm->context.id.stack);
129
130 /* dup_mmap already holds mmap_sem */
131 err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
132 VM_READ | VM_MAYREAD | VM_EXEC |
133 VM_MAYEXEC | VM_DONTCOPY, pages);
134 if (err) {
135 printk(KERN_ERR "install_special_mapping returned %d\n", err);
136 goto out_free;
137 }
138 return;
139
140out_free:
141 kfree(pages);
142out:
143 force_sigsegv(SIGSEGV, current);
144}
145
146void arch_exit_mmap(struct mm_struct *mm)
147{
148 pte_t *pte;
149
150 pte = virt_to_pte(mm, STUB_CODE);
151 if (pte != NULL)
152 pte_clear(mm, STUB_CODE, pte);
153
154 pte = virt_to_pte(mm, STUB_DATA);
155 if (pte == NULL)
156 return;
157
158 pte_clear(mm, STUB_DATA, pte);
159}
160
140void destroy_context(struct mm_struct *mm) 161void destroy_context(struct mm_struct *mm)
141{ 162{
142 struct mm_context *mmu = &mm->context; 163 struct mm_context *mmu = &mm->context;
@@ -146,15 +167,8 @@ void destroy_context(struct mm_struct *mm)
146 else 167 else
147 os_kill_ptraced_process(mmu->id.u.pid, 1); 168 os_kill_ptraced_process(mmu->id.u.pid, 1);
148 169
149 if (!proc_mm || !ptrace_faultinfo) { 170 if (skas_needs_stub)
150 free_page(mmu->id.stack); 171 free_page(mmu->id.stack);
151 pte_lock_deinit(virt_to_page(mmu->last_page_table));
152 pte_free_kernel(mm, (pte_t *) mmu->last_page_table);
153 dec_zone_page_state(virt_to_page(mmu->last_page_table), NR_PAGETABLE);
154#ifdef CONFIG_3_LEVEL_PGTABLES
155 pmd_free(mm, (pmd_t *) mmu->last_pmd);
156#endif
157 }
158 172
159 free_ldt(mmu); 173 free_ldt(mmu);
160} 174}
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 429fed2f66b2..ef5a2a20d351 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -184,6 +184,9 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
184 184
185 pte = pte_offset_kernel(pmd, addr); 185 pte = pte_offset_kernel(pmd, addr);
186 do { 186 do {
187 if ((addr >= STUB_START) && (addr < STUB_END))
188 continue;
189
187 r = pte_read(*pte); 190 r = pte_read(*pte);
188 w = pte_write(*pte); 191 w = pte_write(*pte);
189 x = pte_exec(*pte); 192 x = pte_exec(*pte);
@@ -486,9 +489,6 @@ void __flush_tlb_one(unsigned long addr)
486static void fix_range(struct mm_struct *mm, unsigned long start_addr, 489static void fix_range(struct mm_struct *mm, unsigned long start_addr,
487 unsigned long end_addr, int force) 490 unsigned long end_addr, int force)
488{ 491{
489 if (!proc_mm && (end_addr > STUB_START))
490 end_addr = STUB_START;
491
492 fix_range_common(mm, start_addr, end_addr, force); 492 fix_range_common(mm, start_addr, end_addr, force);
493} 493}
494 494
@@ -502,8 +502,6 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
502 502
503void flush_tlb_mm(struct mm_struct *mm) 503void flush_tlb_mm(struct mm_struct *mm)
504{ 504{
505 unsigned long end;
506
507 /* 505 /*
508 * Don't bother flushing if this address space is about to be 506 * Don't bother flushing if this address space is about to be
509 * destroyed. 507 * destroyed.
@@ -511,8 +509,7 @@ void flush_tlb_mm(struct mm_struct *mm)
511 if (atomic_read(&mm->mm_users) == 0) 509 if (atomic_read(&mm->mm_users) == 0)
512 return; 510 return;
513 511
514 end = proc_mm ? TASK_SIZE : STUB_START; 512 fix_range(mm, 0, TASK_SIZE, 0);
515 fix_range(mm, 0, end, 0);
516} 513}
517 514
518void force_flush_all(void) 515void force_flush_all(void)