aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/mach-omap2/sleep34xx.S
diff options
context:
space:
mode:
authorRichard Woodruff <r-woodruff2@ti.com>2010-12-20 15:05:03 -0500
committerKevin Hilman <khilman@deeprootsystems.com>2010-12-21 17:45:47 -0500
commit0bd40535365c318e331f5e872030a710d5746167 (patch)
treee08c1d72b3eac6e260f2d8841c465d154a03c2af /arch/arm/mach-omap2/sleep34xx.S
parent1cbbe37ac5c78fb59ce02f639d6c4f69b610cf5e (diff)
OMAP3: PM: Update clean_l2 to use v7_flush_dcache_all
Analysis in TI kernel with ETM showed that using cache mapped flush in kernel instead of SO mapped flush cost drops by 65% (3.39mS down to 1.17mS) for clean_l2 which is used during sleep sequences. Overall: - speed up - unfortunately there isn't a good alternative flush method today - code reduction and less maintenance and potential bug in unmaintained code This also fixes the bug with the clean_l2 function usage. Reported-by: Tony Lindgren <tony@atomide.com> Cc: Kevin Hilman <khilman@deeprootsystems.com> Cc: Tony Lindgren <tony@atomide.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Acked-by: Jean Pihet <j-pihet@ti.com> [nm@ti.com: ported rkw's proposal to 2.6.37-rc2] Signed-off-by: Nishanth Menon <nm@ti.com> Signed-off-by: Richard Woodruff <r-woodruff2@ti.com> Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Diffstat (limited to 'arch/arm/mach-omap2/sleep34xx.S')
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S80
1 files changed, 14 insertions, 66 deletions
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index 2fb205a7f28..aa43da5176e 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -520,72 +520,18 @@ clean_caches:
520 cmp r9, #1 /* Check whether L2 inval is required or not*/ 520 cmp r9, #1 /* Check whether L2 inval is required or not*/
521 bne skip_l2_inval 521 bne skip_l2_inval
522clean_l2: 522clean_l2:
523 /* read clidr */ 523 /*
524 mrc p15, 1, r0, c0, c0, 1 524 * Jump out to kernel flush routine
525 /* extract loc from clidr */ 525 * - reuse that code is better
526 ands r3, r0, #0x7000000 526 * - it executes in a cached space so is faster than refetch per-block
527 /* left align loc bit field */ 527 * - should be faster and will change with kernel
528 mov r3, r3, lsr #23 528 * - 'might' have to copy address, load and jump to it
529 /* if loc is 0, then no need to clean */ 529 * - lr is used since we are running in SRAM currently.
530 beq finished 530 */
531 /* start clean at cache level 0 */ 531 ldr r1, kernel_flush
532 mov r10, #0 532 mov lr, pc
533loop1: 533 bx r1
534 /* work out 3x current cache level */ 534
535 add r2, r10, r10, lsr #1
536 /* extract cache type bits from clidr*/
537 mov r1, r0, lsr r2
538 /* mask of the bits for current cache only */
539 and r1, r1, #7
540 /* see what cache we have at this level */
541 cmp r1, #2
542 /* skip if no cache, or just i-cache */
543 blt skip
544 /* select current cache level in cssr */
545 mcr p15, 2, r10, c0, c0, 0
546 /* isb to sych the new cssr&csidr */
547 isb
548 /* read the new csidr */
549 mrc p15, 1, r1, c0, c0, 0
550 /* extract the length of the cache lines */
551 and r2, r1, #7
552 /* add 4 (line length offset) */
553 add r2, r2, #4
554 ldr r4, assoc_mask
555 /* find maximum number on the way size */
556 ands r4, r4, r1, lsr #3
557 /* find bit position of way size increment */
558 clz r5, r4
559 ldr r7, numset_mask
560 /* extract max number of the index size*/
561 ands r7, r7, r1, lsr #13
562loop2:
563 mov r9, r4
564 /* create working copy of max way size*/
565loop3:
566 /* factor way and cache number into r11 */
567 orr r11, r10, r9, lsl r5
568 /* factor index number into r11 */
569 orr r11, r11, r7, lsl r2
570 /*clean & invalidate by set/way */
571 mcr p15, 0, r11, c7, c10, 2
572 /* decrement the way*/
573 subs r9, r9, #1
574 bge loop3
575 /*decrement the index */
576 subs r7, r7, #1
577 bge loop2
578skip:
579 add r10, r10, #2
580 /* increment cache number */
581 cmp r3, r10
582 bgt loop1
583finished:
584 /*swith back to cache level 0 */
585 mov r10, #0
586 /* select current cache level in cssr */
587 mcr p15, 2, r10, c0, c0, 0
588 isb
589skip_l2_inval: 535skip_l2_inval:
590 /* Data memory barrier and Data sync barrier */ 536 /* Data memory barrier and Data sync barrier */
591 mov r1, #0 537 mov r1, #0
@@ -668,5 +614,7 @@ cache_pred_disable_mask:
668 .word 0xFFFFE7FB 614 .word 0xFFFFE7FB
669control_stat: 615control_stat:
670 .word CONTROL_STAT 616 .word CONTROL_STAT
617kernel_flush:
618 .word v7_flush_dcache_all
671ENTRY(omap34xx_cpu_suspend_sz) 619ENTRY(omap34xx_cpu_suspend_sz)
672 .word . - omap34xx_cpu_suspend 620 .word . - omap34xx_cpu_suspend