aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-08-18 15:00:34 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-08-26 23:12:51 -0400
commitea3cc330ac0cd521ff07c7cd432a1848c19a7e92 (patch)
tree82f3e84e28ebf5ae33d05ec0142c22b30a21c60a /arch/powerpc/mm
parentf480fe3916de2e2cbb6e384cb685f0f1d8272188 (diff)
powerpc/mm: Cleanup handling of execute permission
This is an attempt at cleaning up a bit the way we handle execute permission on powerpc. _PAGE_HWEXEC is gone, _PAGE_EXEC is now only defined by CPUs that can do something with it, and the myriad of #ifdef's in the I$/D$ coherency code is reduced to 2 cases that hopefully should cover everything. The logic on BookE is a little bit different than what it was though not by much. Since now, _PAGE_EXEC will be set by the generic code for executable pages, we need to filter out if they are unclean and recover it. However, I don't expect the code to be more bloated than it already was in that area due to that change. I could boast that this brings proper enforcing of per-page execute permissions to all BookE and 40x but in fact, we've had that now for some time as a side effect of my previous rework in that area (and I didn't even know it :-) We would only enable execute permission if the page was cache clean and we would only cache clean it if we took and exec fault. Since we now enforce that the later only work if VM_EXEC is part of the VMA flags, we de-fact already enforce per-page execute permissions... Unless I missed something Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/40x_mmu.c4
-rw-r--r--arch/powerpc/mm/pgtable.c167
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S4
4 files changed, 121 insertions, 56 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 29954dc2894..f5e7b9ce63d 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -105,7 +105,7 @@ unsigned long __init mmu_mapin_ram(void)
105 105
106 while (s >= LARGE_PAGE_SIZE_16M) { 106 while (s >= LARGE_PAGE_SIZE_16M) {
107 pmd_t *pmdp; 107 pmd_t *pmdp;
108 unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; 108 unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
109 109
110 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); 110 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
111 pmd_val(*pmdp++) = val; 111 pmd_val(*pmdp++) = val;
@@ -120,7 +120,7 @@ unsigned long __init mmu_mapin_ram(void)
120 120
121 while (s >= LARGE_PAGE_SIZE_4M) { 121 while (s >= LARGE_PAGE_SIZE_4M) {
122 pmd_t *pmdp; 122 pmd_t *pmdp;
123 unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; 123 unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
124 124
125 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); 125 pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
126 pmd_val(*pmdp) = val; 126 pmd_val(*pmdp) = val;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index b6b32487e74..83f1551ec2c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -128,28 +128,6 @@ void pte_free_finish(void)
128 128
129#endif /* CONFIG_SMP */ 129#endif /* CONFIG_SMP */
130 130
131/*
132 * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
133 */
134static pte_t do_dcache_icache_coherency(pte_t pte)
135{
136 unsigned long pfn = pte_pfn(pte);
137 struct page *page;
138
139 if (unlikely(!pfn_valid(pfn)))
140 return pte;
141 page = pfn_to_page(pfn);
142
143 if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
144 pr_devel("do_dcache_icache_coherency... flushing\n");
145 flush_dcache_icache_page(page);
146 set_bit(PG_arch_1, &page->flags);
147 }
148 else
149 pr_devel("do_dcache_icache_coherency... already clean\n");
150 return __pte(pte_val(pte) | _PAGE_HWEXEC);
151}
152
153static inline int is_exec_fault(void) 131static inline int is_exec_fault(void)
154{ 132{
155 return current->thread.regs && TRAP(current->thread.regs) == 0x400; 133 return current->thread.regs && TRAP(current->thread.regs) == 0x400;
@@ -157,49 +135,139 @@ static inline int is_exec_fault(void)
157 135
158/* We only try to do i/d cache coherency on stuff that looks like 136/* We only try to do i/d cache coherency on stuff that looks like
159 * reasonably "normal" PTEs. We currently require a PTE to be present 137 * reasonably "normal" PTEs. We currently require a PTE to be present
160 * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE 138 * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
139 * on userspace PTEs
161 */ 140 */
162static inline int pte_looks_normal(pte_t pte) 141static inline int pte_looks_normal(pte_t pte)
163{ 142{
164 return (pte_val(pte) & 143 return (pte_val(pte) &
165 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == 144 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
166 (_PAGE_PRESENT); 145 (_PAGE_PRESENT | _PAGE_USER);
167} 146}
168 147
169#if defined(CONFIG_PPC_STD_MMU) 148struct page * maybe_pte_to_page(pte_t pte)
149{
150 unsigned long pfn = pte_pfn(pte);
151 struct page *page;
152
153 if (unlikely(!pfn_valid(pfn)))
154 return NULL;
155 page = pfn_to_page(pfn);
156 if (PageReserved(page))
157 return NULL;
158 return page;
159}
160
161#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
162
170/* Server-style MMU handles coherency when hashing if HW exec permission 163/* Server-style MMU handles coherency when hashing if HW exec permission
171 * is supposed per page (currently 64-bit only). Else, we always flush 164 * is supposed per page (currently 64-bit only). If not, then, we always
172 * valid PTEs in set_pte. 165 * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
166 * support falls into the same category.
173 */ 167 */
174static inline int pte_need_exec_flush(pte_t pte, int set_pte) 168
169static pte_t set_pte_filter(pte_t pte)
175{ 170{
176 return set_pte && pte_looks_normal(pte) && 171 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
177 !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 172 if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
178 cpu_has_feature(CPU_FTR_NOEXECUTE)); 173 cpu_has_feature(CPU_FTR_NOEXECUTE))) {
174 struct page *pg = maybe_pte_to_page(pte);
175 if (!pg)
176 return pte;
177 if (!test_bit(PG_arch_1, &pg->flags)) {
178 flush_dcache_icache_page(pg);
179 set_bit(PG_arch_1, &pg->flags);
180 }
181 }
182 return pte;
179} 183}
180#elif _PAGE_HWEXEC == 0 184
181/* Embedded type MMU without HW exec support (8xx only so far), we flush 185static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
182 * the cache for any present PTE 186 int dirty)
183 */
184static inline int pte_need_exec_flush(pte_t pte, int set_pte)
185{ 187{
186 return set_pte && pte_looks_normal(pte); 188 return pte;
187} 189}
188#else 190
189/* Other embedded CPUs with HW exec support per-page, we flush on exec 191#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
190 * fault if HWEXEC is not set 192
193/* Embedded type MMU with HW exec support. This is a bit more complicated
194 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
195 * instead we "filter out" the exec permission for non clean pages.
191 */ 196 */
192static inline int pte_need_exec_flush(pte_t pte, int set_pte) 197static pte_t set_pte_filter(pte_t pte)
193{ 198{
194 return pte_looks_normal(pte) && is_exec_fault() && 199 struct page *pg;
195 !(pte_val(pte) & _PAGE_HWEXEC); 200
201 /* No exec permission in the first place, move on */
202 if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
203 return pte;
204
205 /* If you set _PAGE_EXEC on weird pages you're on your own */
206 pg = maybe_pte_to_page(pte);
207 if (unlikely(!pg))
208 return pte;
209
210 /* If the page clean, we move on */
211 if (test_bit(PG_arch_1, &pg->flags))
212 return pte;
213
214 /* If it's an exec fault, we flush the cache and make it clean */
215 if (is_exec_fault()) {
216 flush_dcache_icache_page(pg);
217 set_bit(PG_arch_1, &pg->flags);
218 return pte;
219 }
220
221 /* Else, we filter out _PAGE_EXEC */
222 return __pte(pte_val(pte) & ~_PAGE_EXEC);
196} 223}
197#endif 224
225static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
226 int dirty)
227{
228 struct page *pg;
229
230 /* So here, we only care about exec faults, as we use them
231 * to recover lost _PAGE_EXEC and perform I$/D$ coherency
232 * if necessary. Also if _PAGE_EXEC is already set, same deal,
233 * we just bail out
234 */
235 if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
236 return pte;
237
238#ifdef CONFIG_DEBUG_VM
239 /* So this is an exec fault, _PAGE_EXEC is not set. If it was
240 * an error we would have bailed out earlier in do_page_fault()
241 * but let's make sure of it
242 */
243 if (WARN_ON(!(vma->vm_flags & VM_EXEC)))
244 return pte;
245#endif /* CONFIG_DEBUG_VM */
246
247 /* If you set _PAGE_EXEC on weird pages you're on your own */
248 pg = maybe_pte_to_page(pte);
249 if (unlikely(!pg))
250 goto bail;
251
252 /* If the page is already clean, we move on */
253 if (test_bit(PG_arch_1, &pg->flags))
254 goto bail;
255
256 /* Clean the page and set PG_arch_1 */
257 flush_dcache_icache_page(pg);
258 set_bit(PG_arch_1, &pg->flags);
259
260 bail:
261 return __pte(pte_val(pte) | _PAGE_EXEC);
262}
263
264#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
198 265
199/* 266/*
200 * set_pte stores a linux PTE into the linux page table. 267 * set_pte stores a linux PTE into the linux page table.
201 */ 268 */
202void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 269void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
270 pte_t pte)
203{ 271{
204#ifdef CONFIG_DEBUG_VM 272#ifdef CONFIG_DEBUG_VM
205 WARN_ON(pte_present(*ptep)); 273 WARN_ON(pte_present(*ptep));
@@ -208,9 +276,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte
208 * this context might not have been activated yet when this 276 * this context might not have been activated yet when this
209 * is called. 277 * is called.
210 */ 278 */
211 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 279 pte = set_pte_filter(pte);
212 if (pte_need_exec_flush(pte, 1))
213 pte = do_dcache_icache_coherency(pte);
214 280
215 /* Perform the setting of the PTE */ 281 /* Perform the setting of the PTE */
216 __set_pte_at(mm, addr, ptep, pte, 0); 282 __set_pte_at(mm, addr, ptep, pte, 0);
@@ -227,8 +293,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
227 pte_t *ptep, pte_t entry, int dirty) 293 pte_t *ptep, pte_t entry, int dirty)
228{ 294{
229 int changed; 295 int changed;
230 if (!dirty && pte_need_exec_flush(entry, 0)) 296 entry = set_access_flags_filter(entry, vma, dirty);
231 entry = do_dcache_icache_coherency(entry);
232 changed = !pte_same(*(ptep), entry); 297 changed = !pte_same(*(ptep), entry);
233 if (changed) { 298 if (changed) {
234 if (!(vma->vm_flags & VM_HUGETLB)) 299 if (!(vma->vm_flags & VM_HUGETLB))
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5422169626b..cb96cb2e17c 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -142,7 +142,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
142 flags |= _PAGE_DIRTY | _PAGE_HWWRITE; 142 flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
143 143
144 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ 144 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
145 flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); 145 flags &= ~(_PAGE_USER | _PAGE_EXEC);
146 146
147 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 147 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
148} 148}
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 10d524ded7b..cd92f62f9cf 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -133,7 +133,7 @@
133 133
134 /* We do the user/kernel test for the PID here along with the RW test 134 /* We do the user/kernel test for the PID here along with the RW test
135 */ 135 */
136 li r11,_PAGE_PRESENT|_PAGE_HWEXEC /* Base perm */ 136 li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
137 oris r11,r11,_PAGE_ACCESSED@h 137 oris r11,r11,_PAGE_ACCESSED@h
138 138
139 cmpldi cr0,r15,0 /* Check for user region */ 139 cmpldi cr0,r15,0 /* Check for user region */
@@ -256,7 +256,7 @@ normal_tlb_miss_done:
256 256
257normal_tlb_miss_access_fault: 257normal_tlb_miss_access_fault:
258 /* We need to check if it was an instruction miss */ 258 /* We need to check if it was an instruction miss */
259 andi. r10,r11,_PAGE_HWEXEC 259 andi. r10,r11,_PAGE_EXEC
260 bne 1f 260 bne 1f
261 ld r14,EX_TLB_DEAR(r12) 261 ld r14,EX_TLB_DEAR(r12)
262 ld r15,EX_TLB_ESR(r12) 262 ld r15,EX_TLB_ESR(r12)