aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/mm
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2009-09-09 01:04:06 -0400
committerPaul Mundt <lethal@linux-sh.org>2009-09-09 01:04:06 -0400
commit654d364e26c797e8a5f9e2a1393607e6ca0106eb (patch)
tree69af9234533e6972404434d694272200c13477b8 /arch/sh/mm
parent682f88ab74e55dae55ea3bf30b46f56f71b793bd (diff)
sh: sh4_flush_cache_mm() optimizations.
The i-cache flush in the case of VM_EXEC was added way back when as a sanity measure, and in practice we only care about evicting aliases from the d-cache. As a result, it's possible to drop the i-cache flush completely here. After careful profiling it's also come up that all of the work associated with hunting down aliases and doing ranged flushing ends up generating more overhead than simply blasting away the entire dcache, particularly if there are many mm's that need to be iterated over. As a result of that, just move back to flush_dcache_all() in these cases, which restores the old behaviour, and vastly simplifies the path. Additionally, on platforms without aliases at all, this can simply be nopped out. Presently we have the alias check in the SH-4 specific version, but this is true for all of the platforms, so move the check up to a generic location. This cuts down quite a bit on superfluous cacheop IPIs. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/mm')
-rw-r--r--arch/sh/mm/cache-sh4.c124
-rw-r--r--arch/sh/mm/cache.c6
2 files changed, 10 insertions, 120 deletions
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b2453bbef4cd..a5c339bca8aa 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -170,89 +170,13 @@ static void sh4_flush_cache_all(void *unused)
170 flush_icache_all(); 170 flush_icache_all();
171} 171}
172 172
173static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
174 unsigned long end)
175{
176 unsigned long d = 0, p = start & PAGE_MASK;
177 unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
178 unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
179 unsigned long select_bit;
180 unsigned long all_aliases_mask;
181 unsigned long addr_offset;
182 pgd_t *dir;
183 pmd_t *pmd;
184 pud_t *pud;
185 pte_t *pte;
186 int i;
187
188 dir = pgd_offset(mm, p);
189 pud = pud_offset(dir, p);
190 pmd = pmd_offset(pud, p);
191 end = PAGE_ALIGN(end);
192
193 all_aliases_mask = (1 << n_aliases) - 1;
194
195 do {
196 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
197 p &= PMD_MASK;
198 p += PMD_SIZE;
199 pmd++;
200
201 continue;
202 }
203
204 pte = pte_offset_kernel(pmd, p);
205
206 do {
207 unsigned long phys;
208 pte_t entry = *pte;
209
210 if (!(pte_val(entry) & _PAGE_PRESENT)) {
211 pte++;
212 p += PAGE_SIZE;
213 continue;
214 }
215
216 phys = pte_val(entry) & PTE_PHYS_MASK;
217
218 if ((p ^ phys) & alias_mask) {
219 d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
220 d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
221
222 if (d == all_aliases_mask)
223 goto loop_exit;
224 }
225
226 pte++;
227 p += PAGE_SIZE;
228 } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
229 pmd++;
230 } while (p < end);
231
232loop_exit:
233 addr_offset = 0;
234 select_bit = 1;
235
236 for (i = 0; i < n_aliases; i++) {
237 if (d & select_bit) {
238 (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
239 wmb();
240 }
241
242 select_bit <<= 1;
243 addr_offset += PAGE_SIZE;
244 }
245}
246
247/* 173/*
248 * Note : (RPC) since the caches are physically tagged, the only point 174 * Note : (RPC) since the caches are physically tagged, the only point
249 * of flush_cache_mm for SH-4 is to get rid of aliases from the 175 * of flush_cache_mm for SH-4 is to get rid of aliases from the
250 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that 176 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
251 * lines can stay resident so long as the virtual address they were 177 * lines can stay resident so long as the virtual address they were
252 * accessed with (hence cache set) is in accord with the physical 178 * accessed with (hence cache set) is in accord with the physical
253 * address (i.e. tag). It's no different here. So I reckon we don't 179 * address (i.e. tag). It's no different here.
254 * need to flush the I-cache, since aliases don't matter for that. We
255 * should try that.
256 * 180 *
257 * Caller takes mm->mmap_sem. 181 * Caller takes mm->mmap_sem.
258 */ 182 */
@@ -263,33 +187,7 @@ static void sh4_flush_cache_mm(void *arg)
263 if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT) 187 if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
264 return; 188 return;
265 189
266 /* 190 flush_dcache_all();
267 * If cache is only 4k-per-way, there are never any 'aliases'. Since
268 * the cache is physically tagged, the data can just be left in there.
269 */
270 if (boot_cpu_data.dcache.n_aliases == 0)
271 return;
272
273 /*
274 * Don't bother groveling around the dcache for the VMA ranges
275 * if there are too many PTEs to make it worthwhile.
276 */
277 if (mm->nr_ptes >= MAX_DCACHE_PAGES)
278 flush_dcache_all();
279 else {
280 struct vm_area_struct *vma;
281
282 /*
283 * In this case there are reasonably sized ranges to flush,
284 * iterate through the VMA list and take care of any aliases.
285 */
286 for (vma = mm->mmap; vma; vma = vma->vm_next)
287 __flush_cache_mm(mm, vma->vm_start, vma->vm_end);
288 }
289
290 /* Only touch the icache if one of the VMAs has VM_EXEC set. */
291 if (mm->exec_vm)
292 flush_icache_all();
293} 191}
294 192
295/* 193/*
@@ -372,24 +270,10 @@ static void sh4_flush_cache_range(void *args)
372 if (boot_cpu_data.dcache.n_aliases == 0) 270 if (boot_cpu_data.dcache.n_aliases == 0)
373 return; 271 return;
374 272
375 /* 273 flush_dcache_all();
376 * Don't bother with the lookup and alias check if we have a
377 * wide range to cover, just blow away the dcache in its
378 * entirety instead. -- PFM.
379 */
380 if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
381 flush_dcache_all();
382 else
383 __flush_cache_mm(vma->vm_mm, start, end);
384 274
385 if (vma->vm_flags & VM_EXEC) { 275 if (vma->vm_flags & VM_EXEC)
386 /*
387 * TODO: Is this required??? Need to look at how I-cache
388 * coherency is assured when new programs are loaded to see if
389 * this matters.
390 */
391 flush_icache_all(); 276 flush_icache_all();
392 }
393} 277}
394 278
395/** 279/**
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 35c37b7f717a..4aa926054531 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -164,11 +164,17 @@ void flush_cache_all(void)
164 164
165void flush_cache_mm(struct mm_struct *mm) 165void flush_cache_mm(struct mm_struct *mm)
166{ 166{
167 if (boot_cpu_data.dcache.n_aliases == 0)
168 return;
169
167 cacheop_on_each_cpu(local_flush_cache_mm, mm, 1); 170 cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
168} 171}
169 172
170void flush_cache_dup_mm(struct mm_struct *mm) 173void flush_cache_dup_mm(struct mm_struct *mm)
171{ 174{
175 if (boot_cpu_data.dcache.n_aliases == 0)
176 return;
177
172 cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1); 178 cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
173} 179}
174 180