diff options
author | Nicolas Pitre <nico@cam.org> | 2008-04-23 20:04:54 -0400 |
---|---|---|
committer | Lennert Buytenhek <buytenh@marvell.com> | 2008-06-22 16:44:58 -0400 |
commit | 6c386e58aadb90fb5d8b5be979e02d74f8be52fe (patch) | |
tree | 89ddc09277ad4191aa5bfd12db20302544ec2294 /arch/arm/mm | |
parent | 79e90dd5aa95adfdc3117db8a559da3d0195ba58 (diff) |
[ARM] Feroceon: speed up flushing of the entire cache
Flushing the L1 D cache with a test/clean/invalidate loop is very
easy in software, but it is not the quickest way of doing it, as
there is a lot of overhead involved in re-scanning the cache from
the beginning every time we hit a dirty line.
This patch makes proc-feroceon.S use "clean+invalidate by set/way"
loops according to possible cache configuration of Feroceon CPUs
(either direct-mapped or 4-way set associative).
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/proc-feroceon.S | 59 |
1 files changed, 48 insertions, 11 deletions
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 12b46d7b7f5e..00eadb5995c7 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S | |||
@@ -44,11 +44,31 @@ | |||
44 | */ | 44 | */ |
45 | #define CACHE_DLINESIZE 32 | 45 | #define CACHE_DLINESIZE 32 |
46 | 46 | ||
47 | .bss | ||
48 | .align 3 | ||
49 | __cache_params_loc: | ||
50 | .space 8 | ||
51 | |||
47 | .text | 52 | .text |
53 | __cache_params: | ||
54 | .word __cache_params_loc | ||
55 | |||
48 | /* | 56 | /* |
49 | * cpu_feroceon_proc_init() | 57 | * cpu_feroceon_proc_init() |
50 | */ | 58 | */ |
51 | ENTRY(cpu_feroceon_proc_init) | 59 | ENTRY(cpu_feroceon_proc_init) |
60 | mrc p15, 0, r0, c0, c0, 1 @ read cache type register | ||
61 | ldr r1, __cache_params | ||
62 | mov r2, #(16 << 5) | ||
63 | tst r0, #(1 << 16) @ get way | ||
64 | mov r0, r0, lsr #18 @ get cache size order | ||
65 | movne r3, #((4 - 1) << 30) @ 4-way | ||
66 | and r0, r0, #0xf | ||
67 | moveq r3, #0 @ 1-way | ||
68 | mov r2, r2, lsl r0 @ actual cache size | ||
69 | movne r2, r2, lsr #2 @ turned into # of sets | ||
70 | sub r2, r2, #(1 << 5) | ||
71 | stmia r1, {r2, r3} | ||
52 | mov pc, lr | 72 | mov pc, lr |
53 | 73 | ||
54 | /* | 74 | /* |
@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all) | |||
117 | */ | 137 | */ |
118 | ENTRY(feroceon_flush_kern_cache_all) | 138 | ENTRY(feroceon_flush_kern_cache_all) |
119 | mov r2, #VM_EXEC | 139 | mov r2, #VM_EXEC |
120 | mov ip, #0 | 140 | |
121 | __flush_whole_cache: | 141 | __flush_whole_cache: |
122 | 1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate | 142 | ldr r1, __cache_params |
123 | bne 1b | 143 | ldmia r1, {r1, r3} |
144 | 1: orr ip, r1, r3 | ||
145 | 2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way | ||
146 | subs ip, ip, #(1 << 30) @ next way | ||
147 | bcs 2b | ||
148 | subs r1, r1, #(1 << 5) @ next set | ||
149 | bcs 1b | ||
150 | |||
124 | tst r2, #VM_EXEC | 151 | tst r2, #VM_EXEC |
152 | mov ip, #0 | ||
125 | mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache | 153 | mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache |
126 | mcrne p15, 0, ip, c7, c10, 4 @ drain WB | 154 | mcrne p15, 0, ip, c7, c10, 4 @ drain WB |
127 | mov pc, lr | 155 | mov pc, lr |
@@ -138,7 +166,6 @@ __flush_whole_cache: | |||
138 | */ | 166 | */ |
139 | .align 5 | 167 | .align 5 |
140 | ENTRY(feroceon_flush_user_cache_range) | 168 | ENTRY(feroceon_flush_user_cache_range) |
141 | mov ip, #0 | ||
142 | sub r3, r1, r0 @ calculate total size | 169 | sub r3, r1, r0 @ calculate total size |
143 | cmp r3, #CACHE_DLIMIT | 170 | cmp r3, #CACHE_DLIMIT |
144 | bgt __flush_whole_cache | 171 | bgt __flush_whole_cache |
@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range) | |||
152 | cmp r0, r1 | 179 | cmp r0, r1 |
153 | blo 1b | 180 | blo 1b |
154 | tst r2, #VM_EXEC | 181 | tst r2, #VM_EXEC |
182 | mov ip, #0 | ||
155 | mcrne p15, 0, ip, c7, c10, 4 @ drain WB | 183 | mcrne p15, 0, ip, c7, c10, 4 @ drain WB |
156 | mov pc, lr | 184 | mov pc, lr |
157 | 185 | ||
@@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area) | |||
306 | .align 5 | 334 | .align 5 |
307 | ENTRY(cpu_feroceon_switch_mm) | 335 | ENTRY(cpu_feroceon_switch_mm) |
308 | #ifdef CONFIG_MMU | 336 | #ifdef CONFIG_MMU |
309 | mov ip, #0 | 337 | /* |
310 | @ && 'Clean & Invalidate whole DCache' | 338 | * Note: we wish to call __flush_whole_cache but we need to preserve |
311 | 1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate | 339 | * lr to do so. The only way without touching main memory is to |
312 | bne 1b | 340 | * use r2 which is normally used to test the VM_EXEC flag, and |
313 | mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache | 341 | * compensate locally for the skipped ops if it is not set. |
314 | mcr p15, 0, ip, c7, c10, 4 @ drain WB | 342 | */ |
343 | mov r2, lr @ abuse r2 to preserve lr | ||
344 | bl __flush_whole_cache | ||
345 | @ if r2 contains the VM_EXEC bit then the next 2 ops are done already | ||
346 | tst r2, #VM_EXEC | ||
347 | mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache | ||
348 | mcreq p15, 0, ip, c7, c10, 4 @ drain WB | ||
349 | |||
315 | mcr p15, 0, r0, c2, c0, 0 @ load page table pointer | 350 | mcr p15, 0, r0, c2, c0, 0 @ load page table pointer |
316 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs | 351 | mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs |
317 | #endif | 352 | mov pc, r2 |
353 | #else | ||
318 | mov pc, lr | 354 | mov pc, lr |
355 | #endif | ||
319 | 356 | ||
320 | /* | 357 | /* |
321 | * cpu_feroceon_set_pte_ext(ptep, pte, ext) | 358 | * cpu_feroceon_set_pte_ext(ptep, pte, ext) |