aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/fault.c
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-10 11:02:37 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-11 00:00:10 -0500
commit8d30c14cab30d405a05f2aaceda1e9ad57800f36 (patch)
treef3c0f11b3ce157601719119b2fe4b6a869828ae8 /arch/powerpc/mm/fault.c
parent4b7ad3593634c593d0e891ea415f9cf1bbcfcbd2 (diff)
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC. The "old" way was split in 3 different parts depending on the processor type: - Hash with per-page exec support (64-bit and >= POWER4 only) does it at hashing time, by preventing exec on unclean pages and cleaning pages on exec faults. - Everything without per-page exec support (32-bit hash, 8xx, and 64-bit < POWER4) does it for all page going to user space in update_mmu_cache(). - Embedded with per-page exec support does it from do_page_fault() on exec faults, in a way similar to what the hash code does. That leads to confusion, and bugs. For example, the method using update_mmu_cache() is racy on SMP where another processor can see the new PTE and hash it in before we have cleaned the cache, and then blow trying to execute. This is hard to hit but I think it has bitten us in the past. Also, it's inefficient for embedded where we always end up having to do at least one more page fault. This reworks the whole thing by moving the cache sync into two main call sites, though we keep different behaviours depending on the HW capability. The call sites are set_pte_at() which is now made out of line, and ptep_set_access_flags() which joins the former in pgtable.c The base idea for Embedded with per-page exec support, is that we now do the flush at set_pte_at() time when coming from an exec fault, which allows us to avoid the double fault problem completely (we can even improve the situation more by implementing TLB preload in update_mmu_cache() but that's for later). If for some reason we didn't do it there and we try to execute, we'll hit the page fault, which will do a minor fault, which will hit ptep_set_access_flags() to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make this guys also perform the I/D cache sync for exec faults now. This second path is the catch all for things that weren't cleaned at set_pte_at() time. For cpus without per-pag exec support, we always do the sync at set_pte_at(), thus guaranteeing that when the PTE is visible to other processors, the cache is clean. For the 64-bit hash with per-page exec support case, we keep the old mechanism for now. I'll look into changing it later, once I've reworked a bit how we use _PAGE_EXEC. This is also a first step for adding _PAGE_EXEC support for embedded platforms Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/fault.c')
-rw-r--r--arch/powerpc/mm/fault.c46
1 files changed, 17 insertions, 29 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b8636b8a..76993941cac9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,45 +253,33 @@ good_area:
253#endif /* CONFIG_8xx */ 253#endif /* CONFIG_8xx */
254 254
255 if (is_exec) { 255 if (is_exec) {
256#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) 256#ifdef CONFIG_PPC_STD_MMU
257 /* protection fault */ 257 /* Protection fault on exec go straight to failure on
258 * Hash based MMUs as they either don't support per-page
259 * execute permission, or if they do, it's handled already
260 * at the hash level. This test would probably have to
261 * be removed if we change the way this works to make hash
262 * processors use the same I/D cache coherency mechanism
263 * as embedded.
264 */
258 if (error_code & DSISR_PROTFAULT) 265 if (error_code & DSISR_PROTFAULT)
259 goto bad_area; 266 goto bad_area;
267#endif /* CONFIG_PPC_STD_MMU */
268
260 /* 269 /*
261 * Allow execution from readable areas if the MMU does not 270 * Allow execution from readable areas if the MMU does not
262 * provide separate controls over reading and executing. 271 * provide separate controls over reading and executing.
272 *
273 * Note: That code used to not be enabled for 4xx/BookE.
274 * It is now as I/D cache coherency for these is done at
275 * set_pte_at() time and I see no reason why the test
276 * below wouldn't be valid on those processors. This -may-
277 * break programs compiled with a really old ABI though.
263 */ 278 */
264 if (!(vma->vm_flags & VM_EXEC) && 279 if (!(vma->vm_flags & VM_EXEC) &&
265 (cpu_has_feature(CPU_FTR_NOEXECUTE) || 280 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
266 !(vma->vm_flags & (VM_READ | VM_WRITE)))) 281 !(vma->vm_flags & (VM_READ | VM_WRITE))))
267 goto bad_area; 282 goto bad_area;
268#else
269 pte_t *ptep;
270 pmd_t *pmdp;
271
272 /* Since 4xx/Book-E supports per-page execute permission,
273 * we lazily flush dcache to icache. */
274 ptep = NULL;
275 if (get_pteptr(mm, address, &ptep, &pmdp)) {
276 spinlock_t *ptl = pte_lockptr(mm, pmdp);
277 spin_lock(ptl);
278 if (pte_present(*ptep)) {
279 struct page *page = pte_page(*ptep);
280
281 if (!test_bit(PG_arch_1, &page->flags)) {
282 flush_dcache_icache_page(page);
283 set_bit(PG_arch_1, &page->flags);
284 }
285 pte_update(ptep, 0, _PAGE_HWEXEC |
286 _PAGE_ACCESSED);
287 local_flush_tlb_page(vma, address);
288 pte_unmap_unlock(ptep, ptl);
289 up_read(&mm->mmap_sem);
290 return 0;
291 }
292 pte_unmap_unlock(ptep, ptl);
293 }
294#endif
295 /* a write */ 283 /* a write */
296 } else if (is_write) { 284 } else if (is_write) {
297 if (!(vma->vm_flags & VM_WRITE)) 285 if (!(vma->vm_flags & VM_WRITE))