aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorNicolas Pitre <nico@cam.org>2008-04-23 20:04:54 -0400
committerLennert Buytenhek <buytenh@marvell.com>2008-06-22 16:44:58 -0400
commit6c386e58aadb90fb5d8b5be979e02d74f8be52fe (patch)
tree89ddc09277ad4191aa5bfd12db20302544ec2294 /arch
parent79e90dd5aa95adfdc3117db8a559da3d0195ba58 (diff)
[ARM] Feroceon: speed up flushing of the entire cache
Flushing the L1 D cache with a test/clean/invalidate loop is very easy in software, but it is not the quickest way of doing it, as there is a lot of overhead involved in re-scanning the cache from the beginning every time we hit a dirty line. This patch makes proc-feroceon.S use "clean+invalidate by set/way" loops according to possible cache configuration of Feroceon CPUs (either direct-mapped or 4-way set associative). Signed-off-by: Nicolas Pitre <nico@marvell.com> Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/mm/proc-feroceon.S59
1 files changed, 48 insertions, 11 deletions
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 12b46d7b7f5e..00eadb5995c7 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -44,11 +44,31 @@
44 */ 44 */
45#define CACHE_DLINESIZE 32 45#define CACHE_DLINESIZE 32
46 46
47 .bss
48 .align 3
49__cache_params_loc:
50 .space 8
51
47 .text 52 .text
53__cache_params:
54 .word __cache_params_loc
55
48/* 56/*
49 * cpu_feroceon_proc_init() 57 * cpu_feroceon_proc_init()
50 */ 58 */
51ENTRY(cpu_feroceon_proc_init) 59ENTRY(cpu_feroceon_proc_init)
60 mrc p15, 0, r0, c0, c0, 1 @ read cache type register
61 ldr r1, __cache_params
62 mov r2, #(16 << 5)
63 tst r0, #(1 << 16) @ get way
64 mov r0, r0, lsr #18 @ get cache size order
65 movne r3, #((4 - 1) << 30) @ 4-way
66 and r0, r0, #0xf
67 moveq r3, #0 @ 1-way
68 mov r2, r2, lsl r0 @ actual cache size
69 movne r2, r2, lsr #2 @ turned into # of sets
70 sub r2, r2, #(1 << 5)
71 stmia r1, {r2, r3}
52 mov pc, lr 72 mov pc, lr
53 73
54/* 74/*
@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
117 */ 137 */
118ENTRY(feroceon_flush_kern_cache_all) 138ENTRY(feroceon_flush_kern_cache_all)
119 mov r2, #VM_EXEC 139 mov r2, #VM_EXEC
120 mov ip, #0 140
121__flush_whole_cache: 141__flush_whole_cache:
1221: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate 142 ldr r1, __cache_params
123 bne 1b 143 ldmia r1, {r1, r3}
1441: orr ip, r1, r3
1452: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
146 subs ip, ip, #(1 << 30) @ next way
147 bcs 2b
148 subs r1, r1, #(1 << 5) @ next set
149 bcs 1b
150
124 tst r2, #VM_EXEC 151 tst r2, #VM_EXEC
152 mov ip, #0
125 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache 153 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
126 mcrne p15, 0, ip, c7, c10, 4 @ drain WB 154 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
127 mov pc, lr 155 mov pc, lr
@@ -138,7 +166,6 @@ __flush_whole_cache:
138 */ 166 */
139 .align 5 167 .align 5
140ENTRY(feroceon_flush_user_cache_range) 168ENTRY(feroceon_flush_user_cache_range)
141 mov ip, #0
142 sub r3, r1, r0 @ calculate total size 169 sub r3, r1, r0 @ calculate total size
143 cmp r3, #CACHE_DLIMIT 170 cmp r3, #CACHE_DLIMIT
144 bgt __flush_whole_cache 171 bgt __flush_whole_cache
@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
152 cmp r0, r1 179 cmp r0, r1
153 blo 1b 180 blo 1b
154 tst r2, #VM_EXEC 181 tst r2, #VM_EXEC
182 mov ip, #0
155 mcrne p15, 0, ip, c7, c10, 4 @ drain WB 183 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
156 mov pc, lr 184 mov pc, lr
157 185
@@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area)
306 .align 5 334 .align 5
307ENTRY(cpu_feroceon_switch_mm) 335ENTRY(cpu_feroceon_switch_mm)
308#ifdef CONFIG_MMU 336#ifdef CONFIG_MMU
309 mov ip, #0 337 /*
310@ && 'Clean & Invalidate whole DCache' 338 * Note: we wish to call __flush_whole_cache but we need to preserve
3111: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate 339 * lr to do so. The only way without touching main memory is to
312 bne 1b 340 * use r2 which is normally used to test the VM_EXEC flag, and
313 mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache 341 * compensate locally for the skipped ops if it is not set.
314 mcr p15, 0, ip, c7, c10, 4 @ drain WB 342 */
343 mov r2, lr @ abuse r2 to preserve lr
344 bl __flush_whole_cache
345 @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
346 tst r2, #VM_EXEC
347 mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
348 mcreq p15, 0, ip, c7, c10, 4 @ drain WB
349
315 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer 350 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
316 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs 351 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
317#endif 352 mov pc, r2
353#else
318 mov pc, lr 354 mov pc, lr
355#endif
319 356
320/* 357/*
321 * cpu_feroceon_set_pte_ext(ptep, pte, ext) 358 * cpu_feroceon_set_pte_ext(ptep, pte, ext)