aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2006-09-27 05:30:07 -0400
committerPaul Mundt <lethal@linux-sh.org>2006-09-27 05:30:07 -0400
commit28ccf7f91b1ac42ee1f18480a69d2a7486b625ce (patch)
tree456f8287e15c59e8a1f98a49932c4ecf85419227 /arch
parentd15f456043175bdf3464514b92a825b88d0546ae (diff)
sh: Selective flush_cache_mm() flushing.
flush_cache_mm() wraps in to flush_cache_all(), which is rather excessive given that the number of PTEs within the specified context are generally quite low. Optimize for walking the mm's VMA list and selectively flushing the VMA ranges from the dcache. Invalidate the icache only if a VMA sets VM_EXEC. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/sh/mm/cache-sh4.c220
1 files changed, 130 insertions, 90 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 2203bd6aadb3..aa4f62f0e374 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,29 +2,31 @@
2 * arch/sh/mm/cache-sh4.c 2 * arch/sh/mm/cache-sh4.c
3 * 3 *
4 * Copyright (C) 1999, 2000, 2002 Niibe Yutaka 4 * Copyright (C) 1999, 2000, 2002 Niibe Yutaka
5 * Copyright (C) 2001, 2002, 2003, 2004, 2005 Paul Mundt 5 * Copyright (C) 2001 - 2006 Paul Mundt
6 * Copyright (C) 2003 Richard Curnow 6 * Copyright (C) 2003 Richard Curnow
7 * 7 *
8 * This file is subject to the terms and conditions of the GNU General Public 8 * This file is subject to the terms and conditions of the GNU General Public
9 * License. See the file "COPYING" in the main directory of this archive 9 * License. See the file "COPYING" in the main directory of this archive
10 * for more details. 10 * for more details.
11 */ 11 */
12
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/mman.h>
15#include <linux/mm.h> 13#include <linux/mm.h>
16#include <linux/threads.h>
17#include <asm/addrspace.h> 14#include <asm/addrspace.h>
18#include <asm/page.h>
19#include <asm/pgtable.h> 15#include <asm/pgtable.h>
20#include <asm/processor.h> 16#include <asm/processor.h>
21#include <asm/cache.h> 17#include <asm/cache.h>
22#include <asm/io.h> 18#include <asm/io.h>
23#include <asm/uaccess.h>
24#include <asm/pgalloc.h> 19#include <asm/pgalloc.h>
25#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
26#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
27 22
23/*
24 * The maximum number of pages we support up to when doing ranged dcache
25 * flushing. Anything exceeding this will simply flush the dcache in its
26 * entirety.
27 */
28#define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */
29
28static void __flush_dcache_segment_1way(unsigned long start, 30static void __flush_dcache_segment_1way(unsigned long start,
29 unsigned long extent); 31 unsigned long extent);
30static void __flush_dcache_segment_2way(unsigned long start, 32static void __flush_dcache_segment_2way(unsigned long start,
@@ -219,14 +221,14 @@ void flush_cache_sigtramp(unsigned long addr)
219static inline void flush_cache_4096(unsigned long start, 221static inline void flush_cache_4096(unsigned long start,
220 unsigned long phys) 222 unsigned long phys)
221{ 223{
222 unsigned long flags;
223
224 /* 224 /*
225 * All types of SH-4 require PC to be in P2 to operate on the I-cache. 225 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
226 * Some types of SH-4 require PC to be in P2 to operate on the D-cache. 226 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
227 */ 227 */
228 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) 228 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) ||
229 || start < CACHE_OC_ADDRESS_ARRAY) { 229 (start < CACHE_OC_ADDRESS_ARRAY)) {
230 unsigned long flags;
231
230 local_irq_save(flags); 232 local_irq_save(flags);
231 __flush_cache_4096(start | SH_CACHE_ASSOC, 233 __flush_cache_4096(start | SH_CACHE_ASSOC,
232 P1SEGADDR(phys), 0x20000000); 234 P1SEGADDR(phys), 0x20000000);
@@ -257,6 +259,7 @@ void flush_dcache_page(struct page *page)
257 wmb(); 259 wmb();
258} 260}
259 261
262/* TODO: Selective icache invalidation through IC address array.. */
260static inline void flush_icache_all(void) 263static inline void flush_icache_all(void)
261{ 264{
262 unsigned long flags, ccr; 265 unsigned long flags, ccr;
@@ -290,19 +293,121 @@ void flush_cache_all(void)
290 flush_icache_all(); 293 flush_icache_all();
291} 294}
292 295
296static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
297 unsigned long end)
298{
299 unsigned long d = 0, p = start & PAGE_MASK;
300 unsigned long alias_mask = cpu_data->dcache.alias_mask;
301 unsigned long n_aliases = cpu_data->dcache.n_aliases;
302 unsigned long select_bit;
303 unsigned long all_aliases_mask;
304 unsigned long addr_offset;
305 pgd_t *dir;
306 pmd_t *pmd;
307 pud_t *pud;
308 pte_t *pte;
309 int i;
310
311 dir = pgd_offset(mm, p);
312 pud = pud_offset(dir, p);
313 pmd = pmd_offset(pud, p);
314 end = PAGE_ALIGN(end);
315
316 all_aliases_mask = (1 << n_aliases) - 1;
317
318 do {
319 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
320 p &= PMD_MASK;
321 p += PMD_SIZE;
322 pmd++;
323
324 continue;
325 }
326
327 pte = pte_offset_kernel(pmd, p);
328
329 do {
330 unsigned long phys;
331 pte_t entry = *pte;
332
333 if (!(pte_val(entry) & _PAGE_PRESENT)) {
334 pte++;
335 p += PAGE_SIZE;
336 continue;
337 }
338
339 phys = pte_val(entry) & PTE_PHYS_MASK;
340
341 if ((p ^ phys) & alias_mask) {
342 d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
343 d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
344
345 if (d == all_aliases_mask)
346 goto loop_exit;
347 }
348
349 pte++;
350 p += PAGE_SIZE;
351 } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
352 pmd++;
353 } while (p < end);
354
355loop_exit:
356 addr_offset = 0;
357 select_bit = 1;
358
359 for (i = 0; i < n_aliases; i++) {
360 if (d & select_bit) {
361 (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
362 wmb();
363 }
364
365 select_bit <<= 1;
366 addr_offset += PAGE_SIZE;
367 }
368}
369
370/*
371 * Note : (RPC) since the caches are physically tagged, the only point
372 * of flush_cache_mm for SH-4 is to get rid of aliases from the
373 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
374 * lines can stay resident so long as the virtual address they were
375 * accessed with (hence cache set) is in accord with the physical
376 * address (i.e. tag). It's no different here. So I reckon we don't
377 * need to flush the I-cache, since aliases don't matter for that. We
378 * should try that.
379 *
380 * Caller takes mm->mmap_sem.
381 */
293void flush_cache_mm(struct mm_struct *mm) 382void flush_cache_mm(struct mm_struct *mm)
294{ 383{
295 /* 384 /*
296 * Note : (RPC) since the caches are physically tagged, the only point 385 * If cache is only 4k-per-way, there are never any 'aliases'. Since
297 * of flush_cache_mm for SH-4 is to get rid of aliases from the 386 * the cache is physically tagged, the data can just be left in there.
298 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that 387 */
299 * lines can stay resident so long as the virtual address they were 388 if (cpu_data->dcache.n_aliases == 0)
300 * accessed with (hence cache set) is in accord with the physical 389 return;
301 * address (i.e. tag). It's no different here. So I reckon we don't 390
302 * need to flush the I-cache, since aliases don't matter for that. We 391 /*
303 * should try that. 392 * Don't bother groveling around the dcache for the VMA ranges
393 * if there are too many PTEs to make it worthwhile.
304 */ 394 */
305 flush_cache_all(); 395 if (mm->nr_ptes >= MAX_DCACHE_PAGES)
396 flush_dcache_all();
397 else {
398 struct vm_area_struct *vma;
399
400 /*
401 * In this case there are reasonably sized ranges to flush,
402 * iterate through the VMA list and take care of any aliases.
403 */
404 for (vma = mm->mmap; vma; vma = vma->vm_next)
405 __flush_cache_mm(mm, vma->vm_start, vma->vm_end);
406 }
407
408 /* Only touch the icache if one of the VMAs has VM_EXEC set. */
409 if (mm->exec_vm)
410 flush_icache_all();
306} 411}
307 412
308/* 413/*
@@ -311,7 +416,8 @@ void flush_cache_mm(struct mm_struct *mm)
311 * ADDR: Virtual Address (U0 address) 416 * ADDR: Virtual Address (U0 address)
312 * PFN: Physical page number 417 * PFN: Physical page number
313 */ 418 */
314void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) 419void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
420 unsigned long pfn)
315{ 421{
316 unsigned long phys = pfn << PAGE_SHIFT; 422 unsigned long phys = pfn << PAGE_SHIFT;
317 unsigned int alias_mask; 423 unsigned int alias_mask;
@@ -358,87 +464,22 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigne
358void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 464void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
359 unsigned long end) 465 unsigned long end)
360{ 466{
361 unsigned long d = 0, p = start & PAGE_MASK;
362 unsigned long alias_mask = cpu_data->dcache.alias_mask;
363 unsigned long n_aliases = cpu_data->dcache.n_aliases;
364 unsigned long select_bit;
365 unsigned long all_aliases_mask;
366 unsigned long addr_offset;
367 unsigned long phys;
368 pgd_t *dir;
369 pmd_t *pmd;
370 pud_t *pud;
371 pte_t *pte;
372 pte_t entry;
373 int i;
374
375 /* 467 /*
376 * If cache is only 4k-per-way, there are never any 'aliases'. Since 468 * If cache is only 4k-per-way, there are never any 'aliases'. Since
377 * the cache is physically tagged, the data can just be left in there. 469 * the cache is physically tagged, the data can just be left in there.
378 */ 470 */
379 if (n_aliases == 0) 471 if (cpu_data->dcache.n_aliases == 0)
380 return; 472 return;
381 473
382 all_aliases_mask = (1 << n_aliases) - 1;
383
384 /* 474 /*
385 * Don't bother with the lookup and alias check if we have a 475 * Don't bother with the lookup and alias check if we have a
386 * wide range to cover, just blow away the dcache in its 476 * wide range to cover, just blow away the dcache in its
387 * entirety instead. -- PFM. 477 * entirety instead. -- PFM.
388 */ 478 */
389 if (((end - start) >> PAGE_SHIFT) >= 64) { 479 if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
390 flush_dcache_all(); 480 flush_dcache_all();
391 481 else
392 if (vma->vm_flags & VM_EXEC) 482 __flush_cache_mm(vma->vm_mm, start, end);
393 flush_icache_all();
394
395 return;
396 }
397
398 dir = pgd_offset(vma->vm_mm, p);
399 pud = pud_offset(dir, p);
400 pmd = pmd_offset(pud, p);
401 end = PAGE_ALIGN(end);
402
403 do {
404 if (pmd_none(*pmd) || pmd_bad(*pmd)) {
405 p &= ~((1 << PMD_SHIFT) - 1);
406 p += (1 << PMD_SHIFT);
407 pmd++;
408
409 continue;
410 }
411
412 pte = pte_offset_kernel(pmd, p);
413
414 do {
415 entry = *pte;
416
417 if ((pte_val(entry) & _PAGE_PRESENT)) {
418 phys = pte_val(entry) & PTE_PHYS_MASK;
419
420 if ((p ^ phys) & alias_mask) {
421 d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
422 d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
423
424 if (d == all_aliases_mask)
425 goto loop_exit;
426 }
427 }
428
429 pte++;
430 p += PAGE_SIZE;
431 } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
432 pmd++;
433 } while (p < end);
434
435loop_exit:
436 for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases;
437 i++, select_bit <<= 1, addr_offset += PAGE_SIZE)
438 if (d & select_bit) {
439 (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
440 wmb();
441 }
442 483
443 if (vma->vm_flags & VM_EXEC) { 484 if (vma->vm_flags & VM_EXEC) {
444 /* 485 /*
@@ -731,4 +772,3 @@ static void __flush_dcache_segment_4way(unsigned long start,
731 a3 += linesz; 772 a3 += linesz;
732 } while (a0 < a0e); 773 } while (a0 < a0e);
733} 774}
734