diff options
author | Paul Mundt <lethal@linux-sh.org> | 2006-09-27 05:30:07 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2006-09-27 05:30:07 -0400 |
commit | 28ccf7f91b1ac42ee1f18480a69d2a7486b625ce (patch) | |
tree | 456f8287e15c59e8a1f98a49932c4ecf85419227 | |
parent | d15f456043175bdf3464514b92a825b88d0546ae (diff) |
sh: Selective flush_cache_mm() flushing.
flush_cache_mm() wraps in to flush_cache_all(), which is rather
excessive given that the number of PTEs within the specified context
are generally quite low. Optimize for walking the mm's VMA list and
selectively flushing the VMA ranges from the dcache. Invalidate the
icache only if a VMA sets VM_EXEC.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r-- | arch/sh/mm/cache-sh4.c | 220 |
1 files changed, 130 insertions, 90 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 2203bd6aadb3..aa4f62f0e374 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -2,29 +2,31 @@ | |||
2 | * arch/sh/mm/cache-sh4.c | 2 | * arch/sh/mm/cache-sh4.c |
3 | * | 3 | * |
4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka | 4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka |
5 | * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt | 5 | * Copyright (C) 2001 - 2006 Paul Mundt |
6 | * Copyright (C) 2003 Richard Curnow | 6 | * Copyright (C) 2003 Richard Curnow |
7 | * | 7 | * |
8 | * This file is subject to the terms and conditions of the GNU General Public | 8 | * This file is subject to the terms and conditions of the GNU General Public |
9 | * License. See the file "COPYING" in the main directory of this archive | 9 | * License. See the file "COPYING" in the main directory of this archive |
10 | * for more details. | 10 | * for more details. |
11 | */ | 11 | */ |
12 | |||
13 | #include <linux/init.h> | 12 | #include <linux/init.h> |
14 | #include <linux/mman.h> | ||
15 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
16 | #include <linux/threads.h> | ||
17 | #include <asm/addrspace.h> | 14 | #include <asm/addrspace.h> |
18 | #include <asm/page.h> | ||
19 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
20 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
21 | #include <asm/cache.h> | 17 | #include <asm/cache.h> |
22 | #include <asm/io.h> | 18 | #include <asm/io.h> |
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
25 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
26 | #include <asm/cacheflush.h> | 21 | #include <asm/cacheflush.h> |
27 | 22 | ||
23 | /* | ||
24 | * The maximum number of pages we support up to when doing ranged dcache | ||
25 | * flushing. Anything exceeding this will simply flush the dcache in its | ||
26 | * entirety. | ||
27 | */ | ||
28 | #define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ | ||
29 | |||
28 | static void __flush_dcache_segment_1way(unsigned long start, | 30 | static void __flush_dcache_segment_1way(unsigned long start, |
29 | unsigned long extent); | 31 | unsigned long extent); |
30 | static void __flush_dcache_segment_2way(unsigned long start, | 32 | static void __flush_dcache_segment_2way(unsigned long start, |
@@ -219,14 +221,14 @@ void flush_cache_sigtramp(unsigned long addr) | |||
219 | static inline void flush_cache_4096(unsigned long start, | 221 | static inline void flush_cache_4096(unsigned long start, |
220 | unsigned long phys) | 222 | unsigned long phys) |
221 | { | 223 | { |
222 | unsigned long flags; | ||
223 | |||
224 | /* | 224 | /* |
225 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. | 225 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. |
226 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. | 226 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. |
227 | */ | 227 | */ |
228 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) | 228 | if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) || |
229 | || start < CACHE_OC_ADDRESS_ARRAY) { | 229 | (start < CACHE_OC_ADDRESS_ARRAY)) { |
230 | unsigned long flags; | ||
231 | |||
230 | local_irq_save(flags); | 232 | local_irq_save(flags); |
231 | __flush_cache_4096(start | SH_CACHE_ASSOC, | 233 | __flush_cache_4096(start | SH_CACHE_ASSOC, |
232 | P1SEGADDR(phys), 0x20000000); | 234 | P1SEGADDR(phys), 0x20000000); |
@@ -257,6 +259,7 @@ void flush_dcache_page(struct page *page) | |||
257 | wmb(); | 259 | wmb(); |
258 | } | 260 | } |
259 | 261 | ||
262 | /* TODO: Selective icache invalidation through IC address array.. */ | ||
260 | static inline void flush_icache_all(void) | 263 | static inline void flush_icache_all(void) |
261 | { | 264 | { |
262 | unsigned long flags, ccr; | 265 | unsigned long flags, ccr; |
@@ -290,19 +293,121 @@ void flush_cache_all(void) | |||
290 | flush_icache_all(); | 293 | flush_icache_all(); |
291 | } | 294 | } |
292 | 295 | ||
296 | static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, | ||
297 | unsigned long end) | ||
298 | { | ||
299 | unsigned long d = 0, p = start & PAGE_MASK; | ||
300 | unsigned long alias_mask = cpu_data->dcache.alias_mask; | ||
301 | unsigned long n_aliases = cpu_data->dcache.n_aliases; | ||
302 | unsigned long select_bit; | ||
303 | unsigned long all_aliases_mask; | ||
304 | unsigned long addr_offset; | ||
305 | pgd_t *dir; | ||
306 | pmd_t *pmd; | ||
307 | pud_t *pud; | ||
308 | pte_t *pte; | ||
309 | int i; | ||
310 | |||
311 | dir = pgd_offset(mm, p); | ||
312 | pud = pud_offset(dir, p); | ||
313 | pmd = pmd_offset(pud, p); | ||
314 | end = PAGE_ALIGN(end); | ||
315 | |||
316 | all_aliases_mask = (1 << n_aliases) - 1; | ||
317 | |||
318 | do { | ||
319 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { | ||
320 | p &= PMD_MASK; | ||
321 | p += PMD_SIZE; | ||
322 | pmd++; | ||
323 | |||
324 | continue; | ||
325 | } | ||
326 | |||
327 | pte = pte_offset_kernel(pmd, p); | ||
328 | |||
329 | do { | ||
330 | unsigned long phys; | ||
331 | pte_t entry = *pte; | ||
332 | |||
333 | if (!(pte_val(entry) & _PAGE_PRESENT)) { | ||
334 | pte++; | ||
335 | p += PAGE_SIZE; | ||
336 | continue; | ||
337 | } | ||
338 | |||
339 | phys = pte_val(entry) & PTE_PHYS_MASK; | ||
340 | |||
341 | if ((p ^ phys) & alias_mask) { | ||
342 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); | ||
343 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); | ||
344 | |||
345 | if (d == all_aliases_mask) | ||
346 | goto loop_exit; | ||
347 | } | ||
348 | |||
349 | pte++; | ||
350 | p += PAGE_SIZE; | ||
351 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
352 | pmd++; | ||
353 | } while (p < end); | ||
354 | |||
355 | loop_exit: | ||
356 | addr_offset = 0; | ||
357 | select_bit = 1; | ||
358 | |||
359 | for (i = 0; i < n_aliases; i++) { | ||
360 | if (d & select_bit) { | ||
361 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); | ||
362 | wmb(); | ||
363 | } | ||
364 | |||
365 | select_bit <<= 1; | ||
366 | addr_offset += PAGE_SIZE; | ||
367 | } | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * Note : (RPC) since the caches are physically tagged, the only point | ||
372 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | ||
373 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | ||
374 | * lines can stay resident so long as the virtual address they were | ||
375 | * accessed with (hence cache set) is in accord with the physical | ||
376 | * address (i.e. tag). It's no different here. So I reckon we don't | ||
377 | * need to flush the I-cache, since aliases don't matter for that. We | ||
378 | * should try that. | ||
379 | * | ||
380 | * Caller takes mm->mmap_sem. | ||
381 | */ | ||
293 | void flush_cache_mm(struct mm_struct *mm) | 382 | void flush_cache_mm(struct mm_struct *mm) |
294 | { | 383 | { |
295 | /* | 384 | /* |
296 | * Note : (RPC) since the caches are physically tagged, the only point | 385 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
297 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | 386 | * the cache is physically tagged, the data can just be left in there. |
298 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | 387 | */ |
299 | * lines can stay resident so long as the virtual address they were | 388 | if (cpu_data->dcache.n_aliases == 0) |
300 | * accessed with (hence cache set) is in accord with the physical | 389 | return; |
301 | * address (i.e. tag). It's no different here. So I reckon we don't | 390 | |
302 | * need to flush the I-cache, since aliases don't matter for that. We | 391 | /* |
303 | * should try that. | 392 | * Don't bother groveling around the dcache for the VMA ranges |
393 | * if there are too many PTEs to make it worthwhile. | ||
304 | */ | 394 | */ |
305 | flush_cache_all(); | 395 | if (mm->nr_ptes >= MAX_DCACHE_PAGES) |
396 | flush_dcache_all(); | ||
397 | else { | ||
398 | struct vm_area_struct *vma; | ||
399 | |||
400 | /* | ||
401 | * In this case there are reasonably sized ranges to flush, | ||
402 | * iterate through the VMA list and take care of any aliases. | ||
403 | */ | ||
404 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
405 | __flush_cache_mm(mm, vma->vm_start, vma->vm_end); | ||
406 | } | ||
407 | |||
408 | /* Only touch the icache if one of the VMAs has VM_EXEC set. */ | ||
409 | if (mm->exec_vm) | ||
410 | flush_icache_all(); | ||
306 | } | 411 | } |
307 | 412 | ||
308 | /* | 413 | /* |
@@ -311,7 +416,8 @@ void flush_cache_mm(struct mm_struct *mm) | |||
311 | * ADDR: Virtual Address (U0 address) | 416 | * ADDR: Virtual Address (U0 address) |
312 | * PFN: Physical page number | 417 | * PFN: Physical page number |
313 | */ | 418 | */ |
314 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) | 419 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, |
420 | unsigned long pfn) | ||
315 | { | 421 | { |
316 | unsigned long phys = pfn << PAGE_SHIFT; | 422 | unsigned long phys = pfn << PAGE_SHIFT; |
317 | unsigned int alias_mask; | 423 | unsigned int alias_mask; |
@@ -358,87 +464,22 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne | |||
358 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | 464 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, |
359 | unsigned long end) | 465 | unsigned long end) |
360 | { | 466 | { |
361 | unsigned long d = 0, p = start & PAGE_MASK; | ||
362 | unsigned long alias_mask = cpu_data->dcache.alias_mask; | ||
363 | unsigned long n_aliases = cpu_data->dcache.n_aliases; | ||
364 | unsigned long select_bit; | ||
365 | unsigned long all_aliases_mask; | ||
366 | unsigned long addr_offset; | ||
367 | unsigned long phys; | ||
368 | pgd_t *dir; | ||
369 | pmd_t *pmd; | ||
370 | pud_t *pud; | ||
371 | pte_t *pte; | ||
372 | pte_t entry; | ||
373 | int i; | ||
374 | |||
375 | /* | 467 | /* |
376 | * If cache is only 4k-per-way, there are never any 'aliases'. Since | 468 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
377 | * the cache is physically tagged, the data can just be left in there. | 469 | * the cache is physically tagged, the data can just be left in there. |
378 | */ | 470 | */ |
379 | if (n_aliases == 0) | 471 | if (cpu_data->dcache.n_aliases == 0) |
380 | return; | 472 | return; |
381 | 473 | ||
382 | all_aliases_mask = (1 << n_aliases) - 1; | ||
383 | |||
384 | /* | 474 | /* |
385 | * Don't bother with the lookup and alias check if we have a | 475 | * Don't bother with the lookup and alias check if we have a |
386 | * wide range to cover, just blow away the dcache in its | 476 | * wide range to cover, just blow away the dcache in its |
387 | * entirety instead. -- PFM. | 477 | * entirety instead. -- PFM. |
388 | */ | 478 | */ |
389 | if (((end - start) >> PAGE_SHIFT) >= 64) { | 479 | if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) |
390 | flush_dcache_all(); | 480 | flush_dcache_all(); |
391 | 481 | else | |
392 | if (vma->vm_flags & VM_EXEC) | 482 | __flush_cache_mm(vma->vm_mm, start, end); |
393 | flush_icache_all(); | ||
394 | |||
395 | return; | ||
396 | } | ||
397 | |||
398 | dir = pgd_offset(vma->vm_mm, p); | ||
399 | pud = pud_offset(dir, p); | ||
400 | pmd = pmd_offset(pud, p); | ||
401 | end = PAGE_ALIGN(end); | ||
402 | |||
403 | do { | ||
404 | if (pmd_none(*pmd) || pmd_bad(*pmd)) { | ||
405 | p &= ~((1 << PMD_SHIFT) - 1); | ||
406 | p += (1 << PMD_SHIFT); | ||
407 | pmd++; | ||
408 | |||
409 | continue; | ||
410 | } | ||
411 | |||
412 | pte = pte_offset_kernel(pmd, p); | ||
413 | |||
414 | do { | ||
415 | entry = *pte; | ||
416 | |||
417 | if ((pte_val(entry) & _PAGE_PRESENT)) { | ||
418 | phys = pte_val(entry) & PTE_PHYS_MASK; | ||
419 | |||
420 | if ((p ^ phys) & alias_mask) { | ||
421 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); | ||
422 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); | ||
423 | |||
424 | if (d == all_aliases_mask) | ||
425 | goto loop_exit; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | pte++; | ||
430 | p += PAGE_SIZE; | ||
431 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | ||
432 | pmd++; | ||
433 | } while (p < end); | ||
434 | |||
435 | loop_exit: | ||
436 | for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases; | ||
437 | i++, select_bit <<= 1, addr_offset += PAGE_SIZE) | ||
438 | if (d & select_bit) { | ||
439 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); | ||
440 | wmb(); | ||
441 | } | ||
442 | 483 | ||
443 | if (vma->vm_flags & VM_EXEC) { | 484 | if (vma->vm_flags & VM_EXEC) { |
444 | /* | 485 | /* |
@@ -731,4 +772,3 @@ static void __flush_dcache_segment_4way(unsigned long start, | |||
731 | a3 += linesz; | 772 | a3 += linesz; |
732 | } while (a0 < a0e); | 773 | } while (a0 < a0e); |
733 | } | 774 | } |
734 | |||