aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-10 11:02:37 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-11 00:00:10 -0500
commit8d30c14cab30d405a05f2aaceda1e9ad57800f36 (patch)
treef3c0f11b3ce157601719119b2fe4b6a869828ae8
parent4b7ad3593634c593d0e891ea415f9cf1bbcfcbd2 (diff)
powerpc/mm: Rework I$/D$ coherency (v3)
This patch reworks the way we do I and D cache coherency on PowerPC. The "old" way was split in 3 different parts depending on the processor type: - Hash with per-page exec support (64-bit and >= POWER4 only) does it at hashing time, by preventing exec on unclean pages and cleaning pages on exec faults. - Everything without per-page exec support (32-bit hash, 8xx, and 64-bit < POWER4) does it for all page going to user space in update_mmu_cache(). - Embedded with per-page exec support does it from do_page_fault() on exec faults, in a way similar to what the hash code does. That leads to confusion, and bugs. For example, the method using update_mmu_cache() is racy on SMP where another processor can see the new PTE and hash it in before we have cleaned the cache, and then blow trying to execute. This is hard to hit but I think it has bitten us in the past. Also, it's inefficient for embedded where we always end up having to do at least one more page fault. This reworks the whole thing by moving the cache sync into two main call sites, though we keep different behaviours depending on the HW capability. The call sites are set_pte_at() which is now made out of line, and ptep_set_access_flags() which joins the former in pgtable.c The base idea for Embedded with per-page exec support, is that we now do the flush at set_pte_at() time when coming from an exec fault, which allows us to avoid the double fault problem completely (we can even improve the situation more by implementing TLB preload in update_mmu_cache() but that's for later). If for some reason we didn't do it there and we try to execute, we'll hit the page fault, which will do a minor fault, which will hit ptep_set_access_flags() to do things like update _PAGE_ACCESSED or _PAGE_DIRTY if needed, we just make this guys also perform the I/D cache sync for exec faults now. This second path is the catch all for things that weren't cleaned at set_pte_at() time. For cpus without per-pag exec support, we always do the sync at set_pte_at(), thus guaranteeing that when the PTE is visible to other processors, the cache is clean. For the 64-bit hash with per-page exec support case, we keep the old mechanism for now. I'll look into changing it later, once I've reworked a bit how we use _PAGE_EXEC. This is also a first step for adding _PAGE_EXEC support for embedded platforms Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/highmem.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h56
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h29
-rw-r--r--arch/powerpc/include/asm/pgtable.h84
-rw-r--r--arch/powerpc/mm/fault.c46
-rw-r--r--arch/powerpc/mm/mem.c33
-rw-r--r--arch/powerpc/mm/pgtable.c131
7 files changed, 245 insertions, 136 deletions
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index 04e4a620952..a286e47100b 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -99,7 +99,7 @@ static inline void *kmap_atomic_prot(struct page *page, enum km_type type, pgpro
99#ifdef CONFIG_DEBUG_HIGHMEM 99#ifdef CONFIG_DEBUG_HIGHMEM
100 BUG_ON(!pte_none(*(kmap_pte-idx))); 100 BUG_ON(!pte_none(*(kmap_pte-idx)));
101#endif 101#endif
102 __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); 102 __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
103 local_flush_tlb_page(NULL, vaddr); 103 local_flush_tlb_page(NULL, vaddr);
104 104
105 return (void*) vaddr; 105 return (void*) vaddr;
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index f69a4d97772..211c90df476 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -429,6 +429,8 @@ extern int icache_44x_need_flush;
429#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() 429#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
430#endif 430#endif
431 431
432#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
433
432#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) 434#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
433 435
434 436
@@ -667,44 +669,6 @@ static inline unsigned long long pte_update(pte_t *p,
667#endif /* CONFIG_PTE_64BIT */ 669#endif /* CONFIG_PTE_64BIT */
668 670
669/* 671/*
670 * set_pte stores a linux PTE into the linux page table.
671 * On machines which use an MMU hash table we avoid changing the
672 * _PAGE_HASHPTE bit.
673 */
674
675static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
676 pte_t *ptep, pte_t pte)
677{
678#if (_PAGE_HASHPTE != 0) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
679 pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte) & ~_PAGE_HASHPTE);
680#elif defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP)
681#if _PAGE_HASHPTE != 0
682 if (pte_val(*ptep) & _PAGE_HASHPTE)
683 flush_hash_entry(mm, ptep, addr);
684#endif
685 __asm__ __volatile__("\
686 stw%U0%X0 %2,%0\n\
687 eieio\n\
688 stw%U0%X0 %L2,%1"
689 : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
690 : "r" (pte) : "memory");
691#else
692 *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
693 | (pte_val(pte) & ~_PAGE_HASHPTE));
694#endif
695}
696
697
698static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
699 pte_t *ptep, pte_t pte)
700{
701#if defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) && defined(CONFIG_DEBUG_VM)
702 WARN_ON(pte_present(*ptep));
703#endif
704 __set_pte_at(mm, addr, ptep, pte);
705}
706
707/*
708 * 2.6 calls this without flushing the TLB entry; this is wrong 672 * 2.6 calls this without flushing the TLB entry; this is wrong
709 * for our hash-based implementation, we fix that up here. 673 * for our hash-based implementation, we fix that up here.
710 */ 674 */
@@ -744,24 +708,14 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
744} 708}
745 709
746 710
747#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 711static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
748static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
749{ 712{
750 unsigned long bits = pte_val(entry) & 713 unsigned long bits = pte_val(entry) &
751 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW); 714 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW |
715 _PAGE_HWEXEC | _PAGE_EXEC);
752 pte_update(ptep, 0, bits); 716 pte_update(ptep, 0, bits);
753} 717}
754 718
755#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
756({ \
757 int __changed = !pte_same(*(__ptep), __entry); \
758 if (__changed) { \
759 __ptep_set_access_flags(__ptep, __entry, __dirty); \
760 flush_tlb_page_nohash(__vma, __address); \
761 } \
762 __changed; \
763})
764
765#define __HAVE_ARCH_PTE_SAME 719#define __HAVE_ARCH_PTE_SAME
766#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0) 720#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
767 721
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index b0f18be81d9..c627877fcf1 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -125,6 +125,8 @@
125#define _PTEIDX_SECONDARY 0x8 125#define _PTEIDX_SECONDARY 0x8
126#define _PTEIDX_GROUP_IX 0x7 126#define _PTEIDX_GROUP_IX 0x7
127 127
128/* To make some generic powerpc code happy */
129#define _PAGE_HWEXEC 0
128 130
129/* 131/*
130 * POWER4 and newer have per page execute protection, older chips can only 132 * POWER4 and newer have per page execute protection, older chips can only
@@ -285,6 +287,10 @@ static inline unsigned long pte_update(struct mm_struct *mm,
285 : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY) 287 : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY)
286 : "cc" ); 288 : "cc" );
287 289
290 /* huge pages use the old page table lock */
291 if (!huge)
292 assert_pte_locked(mm, addr);
293
288 if (old & _PAGE_HASHPTE) 294 if (old & _PAGE_HASHPTE)
289 hpte_need_flush(mm, addr, ptep, old, huge); 295 hpte_need_flush(mm, addr, ptep, old, huge);
290 return old; 296 return old;
@@ -359,23 +365,11 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
359 pte_update(mm, addr, ptep, ~0UL, 0); 365 pte_update(mm, addr, ptep, ~0UL, 0);
360} 366}
361 367
362/*
363 * set_pte stores a linux PTE into the linux page table.
364 */
365static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
366 pte_t *ptep, pte_t pte)
367{
368 if (pte_present(*ptep))
369 pte_clear(mm, addr, ptep);
370 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
371 *ptep = pte;
372}
373 368
374/* Set the dirty and/or accessed bits atomically in a linux PTE, this 369/* Set the dirty and/or accessed bits atomically in a linux PTE, this
375 * function doesn't need to flush the hash entry 370 * function doesn't need to flush the hash entry
376 */ 371 */
377#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 372static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
378static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
379{ 373{
380 unsigned long bits = pte_val(entry) & 374 unsigned long bits = pte_val(entry) &
381 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); 375 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
@@ -392,15 +386,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
392 :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY) 386 :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
393 :"cc"); 387 :"cc");
394} 388}
395#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
396({ \
397 int __changed = !pte_same(*(__ptep), __entry); \
398 if (__changed) { \
399 __ptep_set_access_flags(__ptep, __entry, __dirty); \
400 flush_tlb_page_nohash(__vma, __address); \
401 } \
402 __changed; \
403})
404 389
405#define __HAVE_ARCH_PTE_SAME 390#define __HAVE_ARCH_PTE_SAME
406#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) 391#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 07f55e60169..5c1c4880723 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -6,7 +6,17 @@
6#include <asm/processor.h> /* For TASK_SIZE */ 6#include <asm/processor.h> /* For TASK_SIZE */
7#include <asm/mmu.h> 7#include <asm/mmu.h>
8#include <asm/page.h> 8#include <asm/page.h>
9
9struct mm_struct; 10struct mm_struct;
11
12#ifdef CONFIG_DEBUG_VM
13extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr);
14#else /* CONFIG_DEBUG_VM */
15static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
16{
17}
18#endif /* !CONFIG_DEBUG_VM */
19
10#endif /* !__ASSEMBLY__ */ 20#endif /* !__ASSEMBLY__ */
11 21
12#if defined(CONFIG_PPC64) 22#if defined(CONFIG_PPC64)
@@ -17,6 +27,80 @@ struct mm_struct;
17 27
18#ifndef __ASSEMBLY__ 28#ifndef __ASSEMBLY__
19 29
30/* Insert a PTE, top-level function is out of line. It uses an inline
31 * low level function in the respective pgtable-* files
32 */
33extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
34 pte_t pte);
35
36/* This low level function performs the actual PTE insertion
37 * Setting the PTE depends on the MMU type and other factors. It's
38 * an horrible mess that I'm not going to try to clean up now but
39 * I'm keeping it in one place rather than spread around
40 */
41static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
42 pte_t *ptep, pte_t pte, int percpu)
43{
44#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
45 /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
46 * helper pte_update() which does an atomic update. We need to do that
47 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
48 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
49 * the hash bits instead (ie, same as the non-SMP case)
50 */
51 if (percpu)
52 *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
53 | (pte_val(pte) & ~_PAGE_HASHPTE));
54 else
55 pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
56
57#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP)
58 /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we
59 * can just store as long as we do the two halves in the right order
60 * with a barrier in between. This is possible because we take care,
61 * in the hash code, to pre-invalidate if the PTE was already hashed,
62 * which synchronizes us with any concurrent invalidation.
63 * In the percpu case, we also fallback to the simple update preserving
64 * the hash bits
65 */
66 if (percpu) {
67 *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
68 | (pte_val(pte) & ~_PAGE_HASHPTE));
69 return;
70 }
71#if _PAGE_HASHPTE != 0
72 if (pte_val(*ptep) & _PAGE_HASHPTE)
73 flush_hash_entry(mm, ptep, addr);
74#endif
75 __asm__ __volatile__("\
76 stw%U0%X0 %2,%0\n\
77 eieio\n\
78 stw%U0%X0 %L2,%1"
79 : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
80 : "r" (pte) : "memory");
81
82#elif defined(CONFIG_PPC_STD_MMU_32)
83 /* Third case is 32-bit hash table in UP mode, we need to preserve
84 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
85 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
86 * and see we need to keep track that this PTE needs invalidating
87 */
88 *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
89 | (pte_val(pte) & ~_PAGE_HASHPTE));
90
91#else
92 /* Anything else just stores the PTE normally. That covers all 64-bit
93 * cases, and 32-bit non-hash with 64-bit PTEs in UP mode
94 */
95 *ptep = pte;
96#endif
97}
98
99
100#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
101extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
102 pte_t *ptep, pte_t entry, int dirty);
103
20/* 104/*
21 * Macro to mark a page protection value as "uncacheable". 105 * Macro to mark a page protection value as "uncacheable".
22 */ 106 */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b8636b8..76993941cac 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,45 +253,33 @@ good_area:
253#endif /* CONFIG_8xx */ 253#endif /* CONFIG_8xx */
254 254
255 if (is_exec) { 255 if (is_exec) {
256#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) 256#ifdef CONFIG_PPC_STD_MMU
257 /* protection fault */ 257 /* Protection fault on exec go straight to failure on
258 * Hash based MMUs as they either don't support per-page
259 * execute permission, or if they do, it's handled already
260 * at the hash level. This test would probably have to
261 * be removed if we change the way this works to make hash
262 * processors use the same I/D cache coherency mechanism
263 * as embedded.
264 */
258 if (error_code & DSISR_PROTFAULT) 265 if (error_code & DSISR_PROTFAULT)
259 goto bad_area; 266 goto bad_area;
267#endif /* CONFIG_PPC_STD_MMU */
268
260 /* 269 /*
261 * Allow execution from readable areas if the MMU does not 270 * Allow execution from readable areas if the MMU does not
262 * provide separate controls over reading and executing. 271 * provide separate controls over reading and executing.
272 *
273 * Note: That code used to not be enabled for 4xx/BookE.
274 * It is now as I/D cache coherency for these is done at
275 * set_pte_at() time and I see no reason why the test
276 * below wouldn't be valid on those processors. This -may-
277 * break programs compiled with a really old ABI though.
263 */ 278 */
264 if (!(vma->vm_flags & VM_EXEC) && 279 if (!(vma->vm_flags & VM_EXEC) &&
265 (cpu_has_feature(CPU_FTR_NOEXECUTE) || 280 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
266 !(vma->vm_flags & (VM_READ | VM_WRITE)))) 281 !(vma->vm_flags & (VM_READ | VM_WRITE))))
267 goto bad_area; 282 goto bad_area;
268#else
269 pte_t *ptep;
270 pmd_t *pmdp;
271
272 /* Since 4xx/Book-E supports per-page execute permission,
273 * we lazily flush dcache to icache. */
274 ptep = NULL;
275 if (get_pteptr(mm, address, &ptep, &pmdp)) {
276 spinlock_t *ptl = pte_lockptr(mm, pmdp);
277 spin_lock(ptl);
278 if (pte_present(*ptep)) {
279 struct page *page = pte_page(*ptep);
280
281 if (!test_bit(PG_arch_1, &page->flags)) {
282 flush_dcache_icache_page(page);
283 set_bit(PG_arch_1, &page->flags);
284 }
285 pte_update(ptep, 0, _PAGE_HWEXEC |
286 _PAGE_ACCESSED);
287 local_flush_tlb_page(vma, address);
288 pte_unmap_unlock(ptep, ptl);
289 up_read(&mm->mmap_sem);
290 return 0;
291 }
292 pte_unmap_unlock(ptep, ptl);
293 }
294#endif
295 /* a write */ 283 /* a write */
296 } else if (is_write) { 284 } else if (is_write) {
297 if (!(vma->vm_flags & VM_WRITE)) 285 if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f00f09a77f1..f668fa9ba80 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -472,40 +472,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
472{ 472{
473#ifdef CONFIG_PPC_STD_MMU 473#ifdef CONFIG_PPC_STD_MMU
474 unsigned long access = 0, trap; 474 unsigned long access = 0, trap;
475#endif
476 unsigned long pfn = pte_pfn(pte);
477
478 /* handle i-cache coherency */
479 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
480 !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
481 pfn_valid(pfn)) {
482 struct page *page = pfn_to_page(pfn);
483#ifdef CONFIG_8xx
484 /* On 8xx, cache control instructions (particularly
485 * "dcbst" from flush_dcache_icache) fault as write
486 * operation if there is an unpopulated TLB entry
487 * for the address in question. To workaround that,
488 * we invalidate the TLB here, thus avoiding dcbst
489 * misbehaviour.
490 */
491 _tlbil_va(address, 0 /* 8xx doesn't care about PID */);
492#endif
493 /* The _PAGE_USER test should really be _PAGE_EXEC, but
494 * older glibc versions execute some code from no-exec
495 * pages, which for now we are supporting. If exec-only
496 * pages are ever implemented, this will have to change.
497 */
498 if (!PageReserved(page) && (pte_val(pte) & _PAGE_USER)
499 && !test_bit(PG_arch_1, &page->flags)) {
500 if (vma->vm_mm == current->active_mm) {
501 __flush_dcache_icache((void *) address);
502 } else
503 flush_dcache_icache_page(page);
504 set_bit(PG_arch_1, &page->flags);
505 }
506 }
507 475
508#ifdef CONFIG_PPC_STD_MMU
509 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ 476 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
510 if (!pte_young(pte) || address >= TASK_SIZE) 477 if (!pte_young(pte) || address >= TASK_SIZE)
511 return; 478 return;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 6d94116fdea..a27ded3adac 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * This file contains common routines for dealing with free of page tables 2 * This file contains common routines for dealing with free of page tables
3 * Along with common page table handling code
3 * 4 *
4 * Derived from arch/powerpc/mm/tlb_64.c: 5 * Derived from arch/powerpc/mm/tlb_64.c:
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -115,3 +116,133 @@ void pte_free_finish(void)
115 pte_free_submit(*batchp); 116 pte_free_submit(*batchp);
116 *batchp = NULL; 117 *batchp = NULL;
117} 118}
119
120/*
121 * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
122 */
123static pte_t do_dcache_icache_coherency(pte_t pte)
124{
125 unsigned long pfn = pte_pfn(pte);
126 struct page *page;
127
128 if (unlikely(!pfn_valid(pfn)))
129 return pte;
130 page = pfn_to_page(pfn);
131
132 if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
133 pr_debug("do_dcache_icache_coherency... flushing\n");
134 flush_dcache_icache_page(page);
135 set_bit(PG_arch_1, &page->flags);
136 }
137 else
138 pr_debug("do_dcache_icache_coherency... already clean\n");
139 return __pte(pte_val(pte) | _PAGE_HWEXEC);
140}
141
142static inline int is_exec_fault(void)
143{
144 return current->thread.regs && TRAP(current->thread.regs) == 0x400;
145}
146
147/* We only try to do i/d cache coherency on stuff that looks like
148 * reasonably "normal" PTEs. We currently require a PTE to be present
149 * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
150 */
151static inline int pte_looks_normal(pte_t pte)
152{
153 return (pte_val(pte) &
154 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
155 (_PAGE_PRESENT);
156}
157
158#if defined(CONFIG_PPC_STD_MMU)
159/* Server-style MMU handles coherency when hashing if HW exec permission
160 * is supposed per page (currently 64-bit only). Else, we always flush
161 * valid PTEs in set_pte.
162 */
163static inline int pte_need_exec_flush(pte_t pte, int set_pte)
164{
165 return set_pte && pte_looks_normal(pte) &&
166 !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
167 cpu_has_feature(CPU_FTR_NOEXECUTE));
168}
169#elif _PAGE_HWEXEC == 0
170/* Embedded type MMU without HW exec support (8xx only so far), we flush
171 * the cache for any present PTE
172 */
173static inline int pte_need_exec_flush(pte_t pte, int set_pte)
174{
175 return set_pte && pte_looks_normal(pte);
176}
177#else
178/* Other embedded CPUs with HW exec support per-page, we flush on exec
179 * fault if HWEXEC is not set
180 */
181static inline int pte_need_exec_flush(pte_t pte, int set_pte)
182{
183 return pte_looks_normal(pte) && is_exec_fault() &&
184 !(pte_val(pte) & _PAGE_HWEXEC);
185}
186#endif
187
188/*
189 * set_pte stores a linux PTE into the linux page table.
190 */
191void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
192{
193#ifdef CONFIG_DEBUG_VM
194 WARN_ON(pte_present(*ptep));
195#endif
196 /* Note: mm->context.id might not yet have been assigned as
197 * this context might not have been activated yet when this
198 * is called.
199 */
200 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
201 if (pte_need_exec_flush(pte, 1))
202 pte = do_dcache_icache_coherency(pte);
203
204 /* Perform the setting of the PTE */
205 __set_pte_at(mm, addr, ptep, pte, 0);
206}
207
208/*
209 * This is called when relaxing access to a PTE. It's also called in the page
210 * fault path when we don't hit any of the major fault cases, ie, a minor
211 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
212 * handled those two for us, we additionally deal with missing execute
213 * permission here on some processors
214 */
215int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
216 pte_t *ptep, pte_t entry, int dirty)
217{
218 int changed;
219 if (!dirty && pte_need_exec_flush(entry, 0))
220 entry = do_dcache_icache_coherency(entry);
221 changed = !pte_same(*(ptep), entry);
222 if (changed) {
223 assert_pte_locked(vma->vm_mm, address);
224 __ptep_set_access_flags(ptep, entry);
225 flush_tlb_page_nohash(vma, address);
226 }
227 return changed;
228}
229
230#ifdef CONFIG_DEBUG_VM
231void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
232{
233 pgd_t *pgd;
234 pud_t *pud;
235 pmd_t *pmd;
236
237 if (mm == &init_mm)
238 return;
239 pgd = mm->pgd + pgd_index(addr);
240 BUG_ON(pgd_none(*pgd));
241 pud = pud_offset(pgd, addr);
242 BUG_ON(pud_none(*pud));
243 pmd = pmd_offset(pud, addr);
244 BUG_ON(!pmd_present(*pmd));
245 BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
246}
247#endif /* CONFIG_DEBUG_VM */
248