aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 21:16:36 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:42 -0400
commit69b0475456ff7ef520e16f69d7a15c0d68b74e64 (patch)
tree3e70d47f16437254eff3b3cca4aa275be1b5e275
parent60ec5585496871345c1a8113d7b60ed9d9474866 (diff)
[PATCH] mm: arm ready for split ptlock
Prepare arm for the split page_table_lock: three issues. Signal handling's preserve and restore of iwmmxt context currently involves reading and writing that context to and from user space, while holding page_table_lock to secure the user page(s) against kswapd. If we split the lock, then the structure might span two pages, secured by to read into and write from a kernel stack buffer, copying that out and in without locking (the structure is 160 bytes in size, and here we're near the top of the kernel stack). Or would the overhead be noticeable? arm_syscall's cmpxchg emulation use pte_offset_map_lock, instead of pte_offset_map and mm-wide page_table_lock; and strictly, it should now also take mmap_sem before descending to pmd, to guard against another thread munmapping, and the page table pulled out beneath this thread. Updated two comments in fault-armv.c. adjust_pte is interesting, since its modification of a pte in one part of the mm depends on the lock held when calling update_mmu_cache for a pte in some other part of that mm. This can't be done with a split page_table_lock (and we've already taken the lowest lock in the hierarchy here): so we'll have to disable split on arm, unless CONFIG_CPU_CACHE_VIPT to ensures adjust_pte never used. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/arm/kernel/signal.c96
-rw-r--r--arch/arm/kernel/traps.c14
-rw-r--r--arch/arm/mm/fault-armv.c7
3 files changed, 33 insertions, 84 deletions
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index a94d75fef598..a917e3dd3666 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -139,93 +139,33 @@ struct iwmmxt_sigframe {
139 unsigned long storage[0x98/4]; 139 unsigned long storage[0x98/4];
140}; 140};
141 141
142static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
143{
144 unsigned long addr = (unsigned long)uptr;
145 pgd_t *pgd = pgd_offset(mm, addr);
146 if (pgd_present(*pgd)) {
147 pmd_t *pmd = pmd_offset(pgd, addr);
148 if (pmd_present(*pmd)) {
149 pte_t *pte = pte_offset_map(pmd, addr);
150 return (pte_present(*pte) && (!wr || pte_write(*pte)));
151 }
152 }
153 return 0;
154}
155
156static int copy_locked(void __user *uptr, void *kptr, size_t size, int write,
157 void (*copyfn)(void *, void __user *))
158{
159 unsigned char v, __user *userptr = uptr;
160 int err = 0;
161
162 do {
163 struct mm_struct *mm;
164
165 if (write) {
166 __put_user_error(0, userptr, err);
167 __put_user_error(0, userptr + size - 1, err);
168 } else {
169 __get_user_error(v, userptr, err);
170 __get_user_error(v, userptr + size - 1, err);
171 }
172
173 if (err)
174 break;
175
176 mm = current->mm;
177 spin_lock(&mm->page_table_lock);
178 if (page_present(mm, userptr, write) &&
179 page_present(mm, userptr + size - 1, write)) {
180 copyfn(kptr, uptr);
181 } else
182 err = 1;
183 spin_unlock(&mm->page_table_lock);
184 } while (err);
185
186 return err;
187}
188
189static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame) 142static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
190{ 143{
191 int err = 0; 144 char kbuf[sizeof(*frame) + 8];
145 struct iwmmxt_sigframe *kframe;
192 146
193 /* the iWMMXt context must be 64 bit aligned */ 147 /* the iWMMXt context must be 64 bit aligned */
194 WARN_ON((unsigned long)frame & 7); 148 kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
195 149 kframe->magic0 = IWMMXT_MAGIC0;
196 __put_user_error(IWMMXT_MAGIC0, &frame->magic0, err); 150 kframe->magic1 = IWMMXT_MAGIC1;
197 __put_user_error(IWMMXT_MAGIC1, &frame->magic1, err); 151 iwmmxt_task_copy(current_thread_info(), &kframe->storage);
198 152 return __copy_to_user(frame, kframe, sizeof(*frame));
199 /*
200 * iwmmxt_task_copy() doesn't check user permissions.
201 * Let's do a dummy write on the upper boundary to ensure
202 * access to user mem is OK all way up.
203 */
204 err |= copy_locked(&frame->storage, current_thread_info(),
205 sizeof(frame->storage), 1, iwmmxt_task_copy);
206 return err;
207} 153}
208 154
209static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) 155static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
210{ 156{
211 unsigned long magic0, magic1; 157 char kbuf[sizeof(*frame) + 8];
212 int err = 0; 158 struct iwmmxt_sigframe *kframe;
213 159
214 /* the iWMMXt context is 64 bit aligned */ 160 /* the iWMMXt context must be 64 bit aligned */
215 WARN_ON((unsigned long)frame & 7); 161 kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
216 162 if (__copy_from_user(kframe, frame, sizeof(*frame)))
217 /* 163 return -1;
218 * Validate iWMMXt context signature. 164 if (kframe->magic0 != IWMMXT_MAGIC0 ||
219 * Also, iwmmxt_task_restore() doesn't check user permissions. 165 kframe->magic1 != IWMMXT_MAGIC1)
220 * Let's do a dummy write on the upper boundary to ensure 166 return -1;
221 * access to user mem is OK all way up. 167 iwmmxt_task_restore(current_thread_info(), &kframe->storage);
222 */ 168 return 0;
223 __get_user_error(magic0, &frame->magic0, err);
224 __get_user_error(magic1, &frame->magic1, err);
225 if (!err && magic0 == IWMMXT_MAGIC0 && magic1 == IWMMXT_MAGIC1)
226 err = copy_locked(&frame->storage, current_thread_info(),
227 sizeof(frame->storage), 0, iwmmxt_task_restore);
228 return err;
229} 169}
230 170
231#endif 171#endif
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index baa09601a64e..66e5a0516f23 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -483,29 +483,33 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
483 unsigned long addr = regs->ARM_r2; 483 unsigned long addr = regs->ARM_r2;
484 struct mm_struct *mm = current->mm; 484 struct mm_struct *mm = current->mm;
485 pgd_t *pgd; pmd_t *pmd; pte_t *pte; 485 pgd_t *pgd; pmd_t *pmd; pte_t *pte;
486 spinlock_t *ptl;
486 487
487 regs->ARM_cpsr &= ~PSR_C_BIT; 488 regs->ARM_cpsr &= ~PSR_C_BIT;
488 spin_lock(&mm->page_table_lock); 489 down_read(&mm->mmap_sem);
489 pgd = pgd_offset(mm, addr); 490 pgd = pgd_offset(mm, addr);
490 if (!pgd_present(*pgd)) 491 if (!pgd_present(*pgd))
491 goto bad_access; 492 goto bad_access;
492 pmd = pmd_offset(pgd, addr); 493 pmd = pmd_offset(pgd, addr);
493 if (!pmd_present(*pmd)) 494 if (!pmd_present(*pmd))
494 goto bad_access; 495 goto bad_access;
495 pte = pte_offset_map(pmd, addr); 496 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
496 if (!pte_present(*pte) || !pte_write(*pte)) 497 if (!pte_present(*pte) || !pte_write(*pte)) {
498 pte_unmap_unlock(pte, ptl);
497 goto bad_access; 499 goto bad_access;
500 }
498 val = *(unsigned long *)addr; 501 val = *(unsigned long *)addr;
499 val -= regs->ARM_r0; 502 val -= regs->ARM_r0;
500 if (val == 0) { 503 if (val == 0) {
501 *(unsigned long *)addr = regs->ARM_r1; 504 *(unsigned long *)addr = regs->ARM_r1;
502 regs->ARM_cpsr |= PSR_C_BIT; 505 regs->ARM_cpsr |= PSR_C_BIT;
503 } 506 }
504 spin_unlock(&mm->page_table_lock); 507 pte_unmap_unlock(pte, ptl);
508 up_read(&mm->mmap_sem);
505 return val; 509 return val;
506 510
507 bad_access: 511 bad_access:
508 spin_unlock(&mm->page_table_lock); 512 up_read(&mm->mmap_sem);
509 /* simulate a write access fault */ 513 /* simulate a write access fault */
510 do_DataAbort(addr, 15 + (1 << 11), regs); 514 do_DataAbort(addr, 15 + (1 << 11), regs);
511 return -1; 515 return -1;
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index be4ab3d73c91..7fc1b35a6746 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -26,6 +26,11 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
26/* 26/*
27 * We take the easy way out of this problem - we make the 27 * We take the easy way out of this problem - we make the
28 * PTE uncacheable. However, we leave the write buffer on. 28 * PTE uncacheable. However, we leave the write buffer on.
29 *
30 * Note that the pte lock held when calling update_mmu_cache must also
31 * guard the pte (somewhere else in the same mm) that we modify here.
32 * Therefore those configurations which might call adjust_pte (those
33 * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
29 */ 34 */
30static int adjust_pte(struct vm_area_struct *vma, unsigned long address) 35static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
31{ 36{
@@ -127,7 +132,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page);
127 * 2. If we have multiple shared mappings of the same space in 132 * 2. If we have multiple shared mappings of the same space in
128 * an object, we need to deal with the cache aliasing issues. 133 * an object, we need to deal with the cache aliasing issues.
129 * 134 *
130 * Note that the page_table_lock will be held. 135 * Note that the pte lock will be held.
131 */ 136 */
132void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) 137void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
133{ 138{