diff options
author | Paul Mundt <lethal@linux-sh.org> | 2009-09-09 01:22:15 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2009-09-09 01:22:15 -0400 |
commit | bd6df57481b329dfeeb4889068848ee4f4761561 (patch) | |
tree | df507076dc2aac4240459d6e600f3f0c87dec9f9 /arch/sh/mm | |
parent | 31c9efde786252112cc3d04a1ed3513b6ec63a7b (diff) |
sh: Kill off segment-based d-cache flushing on SH-4.
This kills off the unrolled segment based flushers on SH-4 and switches
over to a generic unrolled approach derived from the writethrough segment
flusher.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/mm')
-rw-r--r-- | arch/sh/mm/cache-sh4.c | 291 |
1 files changed, 20 insertions, 271 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index f0999606686f..92b7d947db94 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c | |||
@@ -30,14 +30,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys, | |||
30 | unsigned long exec_offset); | 30 | unsigned long exec_offset); |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * This is initialised here to ensure that it is not placed in the BSS. If | ||
34 | * that were to happen, note that cache_init gets called before the BSS is | ||
35 | * cleared, so this would get nulled out which would be hopeless. | ||
36 | */ | ||
37 | static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = | ||
38 | (void (*)(unsigned long, unsigned long))0xdeadbeef; | ||
39 | |||
40 | /* | ||
41 | * Write back the range of D-cache, and purge the I-cache. | 33 | * Write back the range of D-cache, and purge the I-cache. |
42 | * | 34 | * |
43 | * Called from kernel/module.c:sys_init_module and routine for a.out format, | 35 | * Called from kernel/module.c:sys_init_module and routine for a.out format, |
@@ -158,10 +150,27 @@ static void __uses_jump_to_uncached flush_icache_all(void) | |||
158 | local_irq_restore(flags); | 150 | local_irq_restore(flags); |
159 | } | 151 | } |
160 | 152 | ||
161 | static inline void flush_dcache_all(void) | 153 | static void flush_dcache_all(void) |
162 | { | 154 | { |
163 | (*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size); | 155 | unsigned long addr, end_addr, entry_offset; |
164 | wmb(); | 156 | |
157 | end_addr = CACHE_OC_ADDRESS_ARRAY + | ||
158 | (current_cpu_data.dcache.sets << | ||
159 | current_cpu_data.dcache.entry_shift) * | ||
160 | current_cpu_data.dcache.ways; | ||
161 | |||
162 | entry_offset = 1 << current_cpu_data.dcache.entry_shift; | ||
163 | |||
164 | for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) { | ||
165 | __raw_writel(0, addr); addr += entry_offset; | ||
166 | __raw_writel(0, addr); addr += entry_offset; | ||
167 | __raw_writel(0, addr); addr += entry_offset; | ||
168 | __raw_writel(0, addr); addr += entry_offset; | ||
169 | __raw_writel(0, addr); addr += entry_offset; | ||
170 | __raw_writel(0, addr); addr += entry_offset; | ||
171 | __raw_writel(0, addr); addr += entry_offset; | ||
172 | __raw_writel(0, addr); addr += entry_offset; | ||
173 | } | ||
165 | } | 174 | } |
166 | 175 | ||
167 | static void sh4_flush_cache_all(void *unused) | 176 | static void sh4_flush_cache_all(void *unused) |
@@ -347,245 +356,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys, | |||
347 | } while (--way_count != 0); | 356 | } while (--way_count != 0); |
348 | } | 357 | } |
349 | 358 | ||
350 | /* | ||
351 | * Break the 1, 2 and 4 way variants of this out into separate functions to | ||
352 | * avoid nearly all the overhead of having the conditional stuff in the function | ||
353 | * bodies (+ the 1 and 2 way cases avoid saving any registers too). | ||
354 | * | ||
355 | * We want to eliminate unnecessary bus transactions, so this code uses | ||
356 | * a non-obvious technique. | ||
357 | * | ||
358 | * Loop over a cache way sized block of, one cache line at a time. For each | ||
359 | * line, use movca.a to cause the current cache line contents to be written | ||
360 | * back, but without reading anything from main memory. However this has the | ||
361 | * side effect that the cache is now caching that memory location. So follow | ||
362 | * this with a cache invalidate to mark the cache line invalid. And do all | ||
363 | * this with interrupts disabled, to avoid the cache line being accidently | ||
364 | * evicted while it is holding garbage. | ||
365 | * | ||
366 | * This also breaks in a number of circumstances: | ||
367 | * - if there are modifications to the region of memory just above | ||
368 | * empty_zero_page (for example because a breakpoint has been placed | ||
369 | * there), then these can be lost. | ||
370 | * | ||
371 | * This is because the the memory address which the cache temporarily | ||
372 | * caches in the above description is empty_zero_page. So the | ||
373 | * movca.l hits the cache (it is assumed that it misses, or at least | ||
374 | * isn't dirty), modifies the line and then invalidates it, losing the | ||
375 | * required change. | ||
376 | * | ||
377 | * - If caches are disabled or configured in write-through mode, then | ||
378 | * the movca.l writes garbage directly into memory. | ||
379 | */ | ||
380 | static void __flush_dcache_segment_writethrough(unsigned long start, | ||
381 | unsigned long extent_per_way) | ||
382 | { | ||
383 | unsigned long addr; | ||
384 | int i; | ||
385 | |||
386 | addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask); | ||
387 | |||
388 | while (extent_per_way) { | ||
389 | for (i = 0; i < cpu_data->dcache.ways; i++) | ||
390 | __raw_writel(0, addr + cpu_data->dcache.way_incr * i); | ||
391 | |||
392 | addr += cpu_data->dcache.linesz; | ||
393 | extent_per_way -= cpu_data->dcache.linesz; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | static void __flush_dcache_segment_1way(unsigned long start, | ||
398 | unsigned long extent_per_way) | ||
399 | { | ||
400 | unsigned long orig_sr, sr_with_bl; | ||
401 | unsigned long base_addr; | ||
402 | unsigned long way_incr, linesz, way_size; | ||
403 | struct cache_info *dcache; | ||
404 | register unsigned long a0, a0e; | ||
405 | |||
406 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
407 | sr_with_bl = orig_sr | (1<<28); | ||
408 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
409 | |||
410 | /* | ||
411 | * The previous code aligned base_addr to 16k, i.e. the way_size of all | ||
412 | * existing SH-4 D-caches. Whilst I don't see a need to have this | ||
413 | * aligned to any better than the cache line size (which it will be | ||
414 | * anyway by construction), let's align it to at least the way_size of | ||
415 | * any existing or conceivable SH-4 D-cache. -- RPC | ||
416 | */ | ||
417 | base_addr = ((base_addr >> 16) << 16); | ||
418 | base_addr |= start; | ||
419 | |||
420 | dcache = &boot_cpu_data.dcache; | ||
421 | linesz = dcache->linesz; | ||
422 | way_incr = dcache->way_incr; | ||
423 | way_size = dcache->way_size; | ||
424 | |||
425 | a0 = base_addr; | ||
426 | a0e = base_addr + extent_per_way; | ||
427 | do { | ||
428 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
429 | asm volatile("movca.l r0, @%0\n\t" | ||
430 | "ocbi @%0" : : "r" (a0)); | ||
431 | a0 += linesz; | ||
432 | asm volatile("movca.l r0, @%0\n\t" | ||
433 | "ocbi @%0" : : "r" (a0)); | ||
434 | a0 += linesz; | ||
435 | asm volatile("movca.l r0, @%0\n\t" | ||
436 | "ocbi @%0" : : "r" (a0)); | ||
437 | a0 += linesz; | ||
438 | asm volatile("movca.l r0, @%0\n\t" | ||
439 | "ocbi @%0" : : "r" (a0)); | ||
440 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
441 | a0 += linesz; | ||
442 | } while (a0 < a0e); | ||
443 | } | ||
444 | |||
445 | static void __flush_dcache_segment_2way(unsigned long start, | ||
446 | unsigned long extent_per_way) | ||
447 | { | ||
448 | unsigned long orig_sr, sr_with_bl; | ||
449 | unsigned long base_addr; | ||
450 | unsigned long way_incr, linesz, way_size; | ||
451 | struct cache_info *dcache; | ||
452 | register unsigned long a0, a1, a0e; | ||
453 | |||
454 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
455 | sr_with_bl = orig_sr | (1<<28); | ||
456 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
457 | |||
458 | /* See comment under 1-way above */ | ||
459 | base_addr = ((base_addr >> 16) << 16); | ||
460 | base_addr |= start; | ||
461 | |||
462 | dcache = &boot_cpu_data.dcache; | ||
463 | linesz = dcache->linesz; | ||
464 | way_incr = dcache->way_incr; | ||
465 | way_size = dcache->way_size; | ||
466 | |||
467 | a0 = base_addr; | ||
468 | a1 = a0 + way_incr; | ||
469 | a0e = base_addr + extent_per_way; | ||
470 | do { | ||
471 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
472 | asm volatile("movca.l r0, @%0\n\t" | ||
473 | "movca.l r0, @%1\n\t" | ||
474 | "ocbi @%0\n\t" | ||
475 | "ocbi @%1" : : | ||
476 | "r" (a0), "r" (a1)); | ||
477 | a0 += linesz; | ||
478 | a1 += linesz; | ||
479 | asm volatile("movca.l r0, @%0\n\t" | ||
480 | "movca.l r0, @%1\n\t" | ||
481 | "ocbi @%0\n\t" | ||
482 | "ocbi @%1" : : | ||
483 | "r" (a0), "r" (a1)); | ||
484 | a0 += linesz; | ||
485 | a1 += linesz; | ||
486 | asm volatile("movca.l r0, @%0\n\t" | ||
487 | "movca.l r0, @%1\n\t" | ||
488 | "ocbi @%0\n\t" | ||
489 | "ocbi @%1" : : | ||
490 | "r" (a0), "r" (a1)); | ||
491 | a0 += linesz; | ||
492 | a1 += linesz; | ||
493 | asm volatile("movca.l r0, @%0\n\t" | ||
494 | "movca.l r0, @%1\n\t" | ||
495 | "ocbi @%0\n\t" | ||
496 | "ocbi @%1" : : | ||
497 | "r" (a0), "r" (a1)); | ||
498 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
499 | a0 += linesz; | ||
500 | a1 += linesz; | ||
501 | } while (a0 < a0e); | ||
502 | } | ||
503 | |||
504 | static void __flush_dcache_segment_4way(unsigned long start, | ||
505 | unsigned long extent_per_way) | ||
506 | { | ||
507 | unsigned long orig_sr, sr_with_bl; | ||
508 | unsigned long base_addr; | ||
509 | unsigned long way_incr, linesz, way_size; | ||
510 | struct cache_info *dcache; | ||
511 | register unsigned long a0, a1, a2, a3, a0e; | ||
512 | |||
513 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | ||
514 | sr_with_bl = orig_sr | (1<<28); | ||
515 | base_addr = ((unsigned long)&empty_zero_page[0]); | ||
516 | |||
517 | /* See comment under 1-way above */ | ||
518 | base_addr = ((base_addr >> 16) << 16); | ||
519 | base_addr |= start; | ||
520 | |||
521 | dcache = &boot_cpu_data.dcache; | ||
522 | linesz = dcache->linesz; | ||
523 | way_incr = dcache->way_incr; | ||
524 | way_size = dcache->way_size; | ||
525 | |||
526 | a0 = base_addr; | ||
527 | a1 = a0 + way_incr; | ||
528 | a2 = a1 + way_incr; | ||
529 | a3 = a2 + way_incr; | ||
530 | a0e = base_addr + extent_per_way; | ||
531 | do { | ||
532 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | ||
533 | asm volatile("movca.l r0, @%0\n\t" | ||
534 | "movca.l r0, @%1\n\t" | ||
535 | "movca.l r0, @%2\n\t" | ||
536 | "movca.l r0, @%3\n\t" | ||
537 | "ocbi @%0\n\t" | ||
538 | "ocbi @%1\n\t" | ||
539 | "ocbi @%2\n\t" | ||
540 | "ocbi @%3\n\t" : : | ||
541 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
542 | a0 += linesz; | ||
543 | a1 += linesz; | ||
544 | a2 += linesz; | ||
545 | a3 += linesz; | ||
546 | asm volatile("movca.l r0, @%0\n\t" | ||
547 | "movca.l r0, @%1\n\t" | ||
548 | "movca.l r0, @%2\n\t" | ||
549 | "movca.l r0, @%3\n\t" | ||
550 | "ocbi @%0\n\t" | ||
551 | "ocbi @%1\n\t" | ||
552 | "ocbi @%2\n\t" | ||
553 | "ocbi @%3\n\t" : : | ||
554 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
555 | a0 += linesz; | ||
556 | a1 += linesz; | ||
557 | a2 += linesz; | ||
558 | a3 += linesz; | ||
559 | asm volatile("movca.l r0, @%0\n\t" | ||
560 | "movca.l r0, @%1\n\t" | ||
561 | "movca.l r0, @%2\n\t" | ||
562 | "movca.l r0, @%3\n\t" | ||
563 | "ocbi @%0\n\t" | ||
564 | "ocbi @%1\n\t" | ||
565 | "ocbi @%2\n\t" | ||
566 | "ocbi @%3\n\t" : : | ||
567 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
568 | a0 += linesz; | ||
569 | a1 += linesz; | ||
570 | a2 += linesz; | ||
571 | a3 += linesz; | ||
572 | asm volatile("movca.l r0, @%0\n\t" | ||
573 | "movca.l r0, @%1\n\t" | ||
574 | "movca.l r0, @%2\n\t" | ||
575 | "movca.l r0, @%3\n\t" | ||
576 | "ocbi @%0\n\t" | ||
577 | "ocbi @%1\n\t" | ||
578 | "ocbi @%2\n\t" | ||
579 | "ocbi @%3\n\t" : : | ||
580 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | ||
581 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | ||
582 | a0 += linesz; | ||
583 | a1 += linesz; | ||
584 | a2 += linesz; | ||
585 | a3 += linesz; | ||
586 | } while (a0 < a0e); | ||
587 | } | ||
588 | |||
589 | extern void __weak sh4__flush_region_init(void); | 359 | extern void __weak sh4__flush_region_init(void); |
590 | 360 | ||
591 | /* | 361 | /* |
@@ -593,32 +363,11 @@ extern void __weak sh4__flush_region_init(void); | |||
593 | */ | 363 | */ |
594 | void __init sh4_cache_init(void) | 364 | void __init sh4_cache_init(void) |
595 | { | 365 | { |
596 | unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT); | ||
597 | |||
598 | printk("PVR=%08x CVR=%08x PRR=%08x\n", | 366 | printk("PVR=%08x CVR=%08x PRR=%08x\n", |
599 | ctrl_inl(CCN_PVR), | 367 | ctrl_inl(CCN_PVR), |
600 | ctrl_inl(CCN_CVR), | 368 | ctrl_inl(CCN_CVR), |
601 | ctrl_inl(CCN_PRR)); | 369 | ctrl_inl(CCN_PRR)); |
602 | 370 | ||
603 | if (wt_enabled) | ||
604 | __flush_dcache_segment_fn = __flush_dcache_segment_writethrough; | ||
605 | else { | ||
606 | switch (boot_cpu_data.dcache.ways) { | ||
607 | case 1: | ||
608 | __flush_dcache_segment_fn = __flush_dcache_segment_1way; | ||
609 | break; | ||
610 | case 2: | ||
611 | __flush_dcache_segment_fn = __flush_dcache_segment_2way; | ||
612 | break; | ||
613 | case 4: | ||
614 | __flush_dcache_segment_fn = __flush_dcache_segment_4way; | ||
615 | break; | ||
616 | default: | ||
617 | panic("unknown number of cache ways\n"); | ||
618 | break; | ||
619 | } | ||
620 | } | ||
621 | |||
622 | local_flush_icache_range = sh4_flush_icache_range; | 371 | local_flush_icache_range = sh4_flush_icache_range; |
623 | local_flush_dcache_page = sh4_flush_dcache_page; | 372 | local_flush_dcache_page = sh4_flush_dcache_page; |
624 | local_flush_cache_all = sh4_flush_cache_all; | 373 | local_flush_cache_all = sh4_flush_cache_all; |