diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-08-18 15:00:34 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-08-26 23:12:51 -0400 |
commit | ea3cc330ac0cd521ff07c7cd432a1848c19a7e92 (patch) | |
tree | 82f3e84e28ebf5ae33d05ec0142c22b30a21c60a /arch/powerpc/mm | |
parent | f480fe3916de2e2cbb6e384cb685f0f1d8272188 (diff) |
powerpc/mm: Cleanup handling of execute permission
This is an attempt at cleaning up a bit the way we handle execute
permission on powerpc. _PAGE_HWEXEC is gone, _PAGE_EXEC is now only
defined by CPUs that can do something with it, and the myriad of
#ifdef's in the I$/D$ coherency code is reduced to 2 cases that
hopefully should cover everything.
The logic on BookE is a little bit different than what it was though
not by much. Since now, _PAGE_EXEC will be set by the generic code
for executable pages, we need to filter out if they are unclean and
recover it. However, I don't expect the code to be more bloated than
it already was in that area due to that change.
I could boast that this brings proper enforcing of per-page execute
permissions to all BookE and 40x but in fact, we've had that now for
some time as a side effect of my previous rework in that area (and
I didn't even know it :-) We would only enable execute permission if
the page was cache clean and we would only cache clean it if we took
and exec fault. Since we now enforce that the later only work if
VM_EXEC is part of the VMA flags, we de-fact already enforce per-page
execute permissions... Unless I missed something
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/40x_mmu.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 167 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 4 |
4 files changed, 121 insertions, 56 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 29954dc2894..f5e7b9ce63d 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c | |||
@@ -105,7 +105,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
105 | 105 | ||
106 | while (s >= LARGE_PAGE_SIZE_16M) { | 106 | while (s >= LARGE_PAGE_SIZE_16M) { |
107 | pmd_t *pmdp; | 107 | pmd_t *pmdp; |
108 | unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; | 108 | unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; |
109 | 109 | ||
110 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); | 110 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); |
111 | pmd_val(*pmdp++) = val; | 111 | pmd_val(*pmdp++) = val; |
@@ -120,7 +120,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
120 | 120 | ||
121 | while (s >= LARGE_PAGE_SIZE_4M) { | 121 | while (s >= LARGE_PAGE_SIZE_4M) { |
122 | pmd_t *pmdp; | 122 | pmd_t *pmdp; |
123 | unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; | 123 | unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; |
124 | 124 | ||
125 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); | 125 | pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); |
126 | pmd_val(*pmdp) = val; | 126 | pmd_val(*pmdp) = val; |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index b6b32487e74..83f1551ec2c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -128,28 +128,6 @@ void pte_free_finish(void) | |||
128 | 128 | ||
129 | #endif /* CONFIG_SMP */ | 129 | #endif /* CONFIG_SMP */ |
130 | 130 | ||
131 | /* | ||
132 | * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags() | ||
133 | */ | ||
134 | static pte_t do_dcache_icache_coherency(pte_t pte) | ||
135 | { | ||
136 | unsigned long pfn = pte_pfn(pte); | ||
137 | struct page *page; | ||
138 | |||
139 | if (unlikely(!pfn_valid(pfn))) | ||
140 | return pte; | ||
141 | page = pfn_to_page(pfn); | ||
142 | |||
143 | if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) { | ||
144 | pr_devel("do_dcache_icache_coherency... flushing\n"); | ||
145 | flush_dcache_icache_page(page); | ||
146 | set_bit(PG_arch_1, &page->flags); | ||
147 | } | ||
148 | else | ||
149 | pr_devel("do_dcache_icache_coherency... already clean\n"); | ||
150 | return __pte(pte_val(pte) | _PAGE_HWEXEC); | ||
151 | } | ||
152 | |||
153 | static inline int is_exec_fault(void) | 131 | static inline int is_exec_fault(void) |
154 | { | 132 | { |
155 | return current->thread.regs && TRAP(current->thread.regs) == 0x400; | 133 | return current->thread.regs && TRAP(current->thread.regs) == 0x400; |
@@ -157,49 +135,139 @@ static inline int is_exec_fault(void) | |||
157 | 135 | ||
158 | /* We only try to do i/d cache coherency on stuff that looks like | 136 | /* We only try to do i/d cache coherency on stuff that looks like |
159 | * reasonably "normal" PTEs. We currently require a PTE to be present | 137 | * reasonably "normal" PTEs. We currently require a PTE to be present |
160 | * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE | 138 | * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that |
139 | * on userspace PTEs | ||
161 | */ | 140 | */ |
162 | static inline int pte_looks_normal(pte_t pte) | 141 | static inline int pte_looks_normal(pte_t pte) |
163 | { | 142 | { |
164 | return (pte_val(pte) & | 143 | return (pte_val(pte) & |
165 | (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == | 144 | (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == |
166 | (_PAGE_PRESENT); | 145 | (_PAGE_PRESENT | _PAGE_USER); |
167 | } | 146 | } |
168 | 147 | ||
169 | #if defined(CONFIG_PPC_STD_MMU) | 148 | struct page * maybe_pte_to_page(pte_t pte) |
149 | { | ||
150 | unsigned long pfn = pte_pfn(pte); | ||
151 | struct page *page; | ||
152 | |||
153 | if (unlikely(!pfn_valid(pfn))) | ||
154 | return NULL; | ||
155 | page = pfn_to_page(pfn); | ||
156 | if (PageReserved(page)) | ||
157 | return NULL; | ||
158 | return page; | ||
159 | } | ||
160 | |||
161 | #if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 | ||
162 | |||
170 | /* Server-style MMU handles coherency when hashing if HW exec permission | 163 | /* Server-style MMU handles coherency when hashing if HW exec permission |
171 | * is supposed per page (currently 64-bit only). Else, we always flush | 164 | * is supposed per page (currently 64-bit only). If not, then, we always |
172 | * valid PTEs in set_pte. | 165 | * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec |
166 | * support falls into the same category. | ||
173 | */ | 167 | */ |
174 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | 168 | |
169 | static pte_t set_pte_filter(pte_t pte) | ||
175 | { | 170 | { |
176 | return set_pte && pte_looks_normal(pte) && | 171 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
177 | !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || | 172 | if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || |
178 | cpu_has_feature(CPU_FTR_NOEXECUTE)); | 173 | cpu_has_feature(CPU_FTR_NOEXECUTE))) { |
174 | struct page *pg = maybe_pte_to_page(pte); | ||
175 | if (!pg) | ||
176 | return pte; | ||
177 | if (!test_bit(PG_arch_1, &pg->flags)) { | ||
178 | flush_dcache_icache_page(pg); | ||
179 | set_bit(PG_arch_1, &pg->flags); | ||
180 | } | ||
181 | } | ||
182 | return pte; | ||
179 | } | 183 | } |
180 | #elif _PAGE_HWEXEC == 0 | 184 | |
181 | /* Embedded type MMU without HW exec support (8xx only so far), we flush | 185 | static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, |
182 | * the cache for any present PTE | 186 | int dirty) |
183 | */ | ||
184 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | ||
185 | { | 187 | { |
186 | return set_pte && pte_looks_normal(pte); | 188 | return pte; |
187 | } | 189 | } |
188 | #else | 190 | |
189 | /* Other embedded CPUs with HW exec support per-page, we flush on exec | 191 | #else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ |
190 | * fault if HWEXEC is not set | 192 | |
193 | /* Embedded type MMU with HW exec support. This is a bit more complicated | ||
194 | * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so | ||
195 | * instead we "filter out" the exec permission for non clean pages. | ||
191 | */ | 196 | */ |
192 | static inline int pte_need_exec_flush(pte_t pte, int set_pte) | 197 | static pte_t set_pte_filter(pte_t pte) |
193 | { | 198 | { |
194 | return pte_looks_normal(pte) && is_exec_fault() && | 199 | struct page *pg; |
195 | !(pte_val(pte) & _PAGE_HWEXEC); | 200 | |
201 | /* No exec permission in the first place, move on */ | ||
202 | if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) | ||
203 | return pte; | ||
204 | |||
205 | /* If you set _PAGE_EXEC on weird pages you're on your own */ | ||
206 | pg = maybe_pte_to_page(pte); | ||
207 | if (unlikely(!pg)) | ||
208 | return pte; | ||
209 | |||
210 | /* If the page clean, we move on */ | ||
211 | if (test_bit(PG_arch_1, &pg->flags)) | ||
212 | return pte; | ||
213 | |||
214 | /* If it's an exec fault, we flush the cache and make it clean */ | ||
215 | if (is_exec_fault()) { | ||
216 | flush_dcache_icache_page(pg); | ||
217 | set_bit(PG_arch_1, &pg->flags); | ||
218 | return pte; | ||
219 | } | ||
220 | |||
221 | /* Else, we filter out _PAGE_EXEC */ | ||
222 | return __pte(pte_val(pte) & ~_PAGE_EXEC); | ||
196 | } | 223 | } |
197 | #endif | 224 | |
225 | static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, | ||
226 | int dirty) | ||
227 | { | ||
228 | struct page *pg; | ||
229 | |||
230 | /* So here, we only care about exec faults, as we use them | ||
231 | * to recover lost _PAGE_EXEC and perform I$/D$ coherency | ||
232 | * if necessary. Also if _PAGE_EXEC is already set, same deal, | ||
233 | * we just bail out | ||
234 | */ | ||
235 | if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) | ||
236 | return pte; | ||
237 | |||
238 | #ifdef CONFIG_DEBUG_VM | ||
239 | /* So this is an exec fault, _PAGE_EXEC is not set. If it was | ||
240 | * an error we would have bailed out earlier in do_page_fault() | ||
241 | * but let's make sure of it | ||
242 | */ | ||
243 | if (WARN_ON(!(vma->vm_flags & VM_EXEC))) | ||
244 | return pte; | ||
245 | #endif /* CONFIG_DEBUG_VM */ | ||
246 | |||
247 | /* If you set _PAGE_EXEC on weird pages you're on your own */ | ||
248 | pg = maybe_pte_to_page(pte); | ||
249 | if (unlikely(!pg)) | ||
250 | goto bail; | ||
251 | |||
252 | /* If the page is already clean, we move on */ | ||
253 | if (test_bit(PG_arch_1, &pg->flags)) | ||
254 | goto bail; | ||
255 | |||
256 | /* Clean the page and set PG_arch_1 */ | ||
257 | flush_dcache_icache_page(pg); | ||
258 | set_bit(PG_arch_1, &pg->flags); | ||
259 | |||
260 | bail: | ||
261 | return __pte(pte_val(pte) | _PAGE_EXEC); | ||
262 | } | ||
263 | |||
264 | #endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ | ||
198 | 265 | ||
199 | /* | 266 | /* |
200 | * set_pte stores a linux PTE into the linux page table. | 267 | * set_pte stores a linux PTE into the linux page table. |
201 | */ | 268 | */ |
202 | void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 269 | void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, |
270 | pte_t pte) | ||
203 | { | 271 | { |
204 | #ifdef CONFIG_DEBUG_VM | 272 | #ifdef CONFIG_DEBUG_VM |
205 | WARN_ON(pte_present(*ptep)); | 273 | WARN_ON(pte_present(*ptep)); |
@@ -208,9 +276,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte | |||
208 | * this context might not have been activated yet when this | 276 | * this context might not have been activated yet when this |
209 | * is called. | 277 | * is called. |
210 | */ | 278 | */ |
211 | pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | 279 | pte = set_pte_filter(pte); |
212 | if (pte_need_exec_flush(pte, 1)) | ||
213 | pte = do_dcache_icache_coherency(pte); | ||
214 | 280 | ||
215 | /* Perform the setting of the PTE */ | 281 | /* Perform the setting of the PTE */ |
216 | __set_pte_at(mm, addr, ptep, pte, 0); | 282 | __set_pte_at(mm, addr, ptep, pte, 0); |
@@ -227,8 +293,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, | |||
227 | pte_t *ptep, pte_t entry, int dirty) | 293 | pte_t *ptep, pte_t entry, int dirty) |
228 | { | 294 | { |
229 | int changed; | 295 | int changed; |
230 | if (!dirty && pte_need_exec_flush(entry, 0)) | 296 | entry = set_access_flags_filter(entry, vma, dirty); |
231 | entry = do_dcache_icache_coherency(entry); | ||
232 | changed = !pte_same(*(ptep), entry); | 297 | changed = !pte_same(*(ptep), entry); |
233 | if (changed) { | 298 | if (changed) { |
234 | if (!(vma->vm_flags & VM_HUGETLB)) | 299 | if (!(vma->vm_flags & VM_HUGETLB)) |
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5422169626b..cb96cb2e17c 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
@@ -142,7 +142,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) | |||
142 | flags |= _PAGE_DIRTY | _PAGE_HWWRITE; | 142 | flags |= _PAGE_DIRTY | _PAGE_HWWRITE; |
143 | 143 | ||
144 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ | 144 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ |
145 | flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); | 145 | flags &= ~(_PAGE_USER | _PAGE_EXEC); |
146 | 146 | ||
147 | return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); | 147 | return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); |
148 | } | 148 | } |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 10d524ded7b..cd92f62f9cf 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
@@ -133,7 +133,7 @@ | |||
133 | 133 | ||
134 | /* We do the user/kernel test for the PID here along with the RW test | 134 | /* We do the user/kernel test for the PID here along with the RW test |
135 | */ | 135 | */ |
136 | li r11,_PAGE_PRESENT|_PAGE_HWEXEC /* Base perm */ | 136 | li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ |
137 | oris r11,r11,_PAGE_ACCESSED@h | 137 | oris r11,r11,_PAGE_ACCESSED@h |
138 | 138 | ||
139 | cmpldi cr0,r15,0 /* Check for user region */ | 139 | cmpldi cr0,r15,0 /* Check for user region */ |
@@ -256,7 +256,7 @@ normal_tlb_miss_done: | |||
256 | 256 | ||
257 | normal_tlb_miss_access_fault: | 257 | normal_tlb_miss_access_fault: |
258 | /* We need to check if it was an instruction miss */ | 258 | /* We need to check if it was an instruction miss */ |
259 | andi. r10,r11,_PAGE_HWEXEC | 259 | andi. r10,r11,_PAGE_EXEC |
260 | bne 1f | 260 | bne 1f |
261 | ld r14,EX_TLB_DEAR(r12) | 261 | ld r14,EX_TLB_DEAR(r12) |
262 | ld r15,EX_TLB_ESR(r12) | 262 | ld r15,EX_TLB_ESR(r12) |