diff options
author | Richard Woodruff <r-woodruff2@ti.com> | 2010-12-20 15:05:03 -0500 |
---|---|---|
committer | Kevin Hilman <khilman@deeprootsystems.com> | 2010-12-21 17:45:47 -0500 |
commit | 0bd40535365c318e331f5e872030a710d5746167 (patch) | |
tree | e08c1d72b3eac6e260f2d8841c465d154a03c2af /arch/arm/mach-omap2/sleep34xx.S | |
parent | 1cbbe37ac5c78fb59ce02f639d6c4f69b610cf5e (diff) |
OMAP3: PM: Update clean_l2 to use v7_flush_dcache_all
Analysis in TI kernel with ETM showed that using cache mapped flush
in kernel instead of SO mapped flush cost drops by 65% (3.39mS down
to 1.17mS) for clean_l2 which is used during sleep sequences.
Overall:
- speed up
- unfortunately there isn't a good alternative flush method today
- code reduction and less maintenance and potential bug in
unmaintained code
This also fixes the bug with the clean_l2 function usage.
Reported-by: Tony Lindgren <tony@atomide.com>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Tony Lindgren <tony@atomide.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
[nm@ti.com: ported rkw's proposal to 2.6.37-rc2]
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Diffstat (limited to 'arch/arm/mach-omap2/sleep34xx.S')
-rw-r--r-- | arch/arm/mach-omap2/sleep34xx.S | 80 |
1 files changed, 14 insertions, 66 deletions
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index 2fb205a7f285..aa43da5176eb 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S | |||
@@ -520,72 +520,18 @@ clean_caches: | |||
520 | cmp r9, #1 /* Check whether L2 inval is required or not*/ | 520 | cmp r9, #1 /* Check whether L2 inval is required or not*/ |
521 | bne skip_l2_inval | 521 | bne skip_l2_inval |
522 | clean_l2: | 522 | clean_l2: |
523 | /* read clidr */ | 523 | /* |
524 | mrc p15, 1, r0, c0, c0, 1 | 524 | * Jump out to kernel flush routine |
525 | /* extract loc from clidr */ | 525 | * - reuse that code is better |
526 | ands r3, r0, #0x7000000 | 526 | * - it executes in a cached space so is faster than refetch per-block |
527 | /* left align loc bit field */ | 527 | * - should be faster and will change with kernel |
528 | mov r3, r3, lsr #23 | 528 | * - 'might' have to copy address, load and jump to it |
529 | /* if loc is 0, then no need to clean */ | 529 | * - lr is used since we are running in SRAM currently. |
530 | beq finished | 530 | */ |
531 | /* start clean at cache level 0 */ | 531 | ldr r1, kernel_flush |
532 | mov r10, #0 | 532 | mov lr, pc |
533 | loop1: | 533 | bx r1 |
534 | /* work out 3x current cache level */ | 534 | |
535 | add r2, r10, r10, lsr #1 | ||
536 | /* extract cache type bits from clidr*/ | ||
537 | mov r1, r0, lsr r2 | ||
538 | /* mask of the bits for current cache only */ | ||
539 | and r1, r1, #7 | ||
540 | /* see what cache we have at this level */ | ||
541 | cmp r1, #2 | ||
542 | /* skip if no cache, or just i-cache */ | ||
543 | blt skip | ||
544 | /* select current cache level in cssr */ | ||
545 | mcr p15, 2, r10, c0, c0, 0 | ||
546 | /* isb to sych the new cssr&csidr */ | ||
547 | isb | ||
548 | /* read the new csidr */ | ||
549 | mrc p15, 1, r1, c0, c0, 0 | ||
550 | /* extract the length of the cache lines */ | ||
551 | and r2, r1, #7 | ||
552 | /* add 4 (line length offset) */ | ||
553 | add r2, r2, #4 | ||
554 | ldr r4, assoc_mask | ||
555 | /* find maximum number on the way size */ | ||
556 | ands r4, r4, r1, lsr #3 | ||
557 | /* find bit position of way size increment */ | ||
558 | clz r5, r4 | ||
559 | ldr r7, numset_mask | ||
560 | /* extract max number of the index size*/ | ||
561 | ands r7, r7, r1, lsr #13 | ||
562 | loop2: | ||
563 | mov r9, r4 | ||
564 | /* create working copy of max way size*/ | ||
565 | loop3: | ||
566 | /* factor way and cache number into r11 */ | ||
567 | orr r11, r10, r9, lsl r5 | ||
568 | /* factor index number into r11 */ | ||
569 | orr r11, r11, r7, lsl r2 | ||
570 | /*clean & invalidate by set/way */ | ||
571 | mcr p15, 0, r11, c7, c10, 2 | ||
572 | /* decrement the way*/ | ||
573 | subs r9, r9, #1 | ||
574 | bge loop3 | ||
575 | /*decrement the index */ | ||
576 | subs r7, r7, #1 | ||
577 | bge loop2 | ||
578 | skip: | ||
579 | add r10, r10, #2 | ||
580 | /* increment cache number */ | ||
581 | cmp r3, r10 | ||
582 | bgt loop1 | ||
583 | finished: | ||
584 | /*swith back to cache level 0 */ | ||
585 | mov r10, #0 | ||
586 | /* select current cache level in cssr */ | ||
587 | mcr p15, 2, r10, c0, c0, 0 | ||
588 | isb | ||
589 | skip_l2_inval: | 535 | skip_l2_inval: |
590 | /* Data memory barrier and Data sync barrier */ | 536 | /* Data memory barrier and Data sync barrier */ |
591 | mov r1, #0 | 537 | mov r1, #0 |
@@ -668,5 +614,7 @@ cache_pred_disable_mask: | |||
668 | .word 0xFFFFE7FB | 614 | .word 0xFFFFE7FB |
669 | control_stat: | 615 | control_stat: |
670 | .word CONTROL_STAT | 616 | .word CONTROL_STAT |
617 | kernel_flush: | ||
618 | .word v7_flush_dcache_all | ||
671 | ENTRY(omap34xx_cpu_suspend_sz) | 619 | ENTRY(omap34xx_cpu_suspend_sz) |
672 | .word . - omap34xx_cpu_suspend | 620 | .word . - omap34xx_cpu_suspend |