aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2013-02-11 08:47:48 -0500
committerWill Deacon <will.deacon@arm.com>2013-08-12 07:25:44 -0400
commitf0915781bd5edf78b1154e61efe962dc15872d09 (patch)
treea8dc2c7b3c4f0f437171a80c6bde06b020418dc3
parent792a843a9f353d3e2474b6f5057b7eaecba41675 (diff)
ARM: tlb: don't perform inner-shareable invalidation for local TLB ops
Inner-shareable TLB invalidation is typically more expensive than local (non-shareable) invalidation, so performing the broadcasting for local_flush_tlb_* operations is a waste of cycles and needlessly clobbers entries in the TLBs of other CPUs. This patch introduces __flush_tlb_* versions for many of the TLB invalidation functions, which only respect inner-shareable variants of the invalidation instructions when presented with the TLB_V7_UIS_FULL flag. The local version is also inlined to prevent SMP_ON_UP kernels from missing flushes, where the __flush variant would be called with the UP flags. This gains us around 0.5% in hackbench scores for a dual-core A15, but I would expect this to improve as more cores (and clusters) are added to the equation. Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Reported-by: Albin Tonnerre <Albin.Tonnerre@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--arch/arm/include/asm/tlbflush.h138
-rw-r--r--arch/arm/kernel/smp_tlb.c8
-rw-r--r--arch/arm/mm/context.c7
3 files changed, 123 insertions, 30 deletions
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index f467e9b3f8d5..3316264916e9 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -319,6 +319,16 @@ extern struct cpu_tlb_fns cpu_tlb;
319#define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg) 319#define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg)
320#define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg) 320#define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg)
321 321
322static inline void __local_flush_tlb_all(void)
323{
324 const int zero = 0;
325 const unsigned int __tlb_flag = __cpu_tlb_flags;
326
327 tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero);
328 tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
329 tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
330}
331
322static inline void local_flush_tlb_all(void) 332static inline void local_flush_tlb_all(void)
323{ 333{
324 const int zero = 0; 334 const int zero = 0;
@@ -327,10 +337,8 @@ static inline void local_flush_tlb_all(void)
327 if (tlb_flag(TLB_WB)) 337 if (tlb_flag(TLB_WB))
328 dsb(); 338 dsb();
329 339
330 tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); 340 __local_flush_tlb_all();
331 tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); 341 tlb_op(TLB_V7_UIS_FULL, "c8, c7, 0", zero);
332 tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
333 tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
334 342
335 if (tlb_flag(TLB_BARRIER)) { 343 if (tlb_flag(TLB_BARRIER)) {
336 dsb(); 344 dsb();
@@ -338,31 +346,69 @@ static inline void local_flush_tlb_all(void)
338 } 346 }
339} 347}
340 348
341static inline void local_flush_tlb_mm(struct mm_struct *mm) 349static inline void __flush_tlb_all(void)
342{ 350{
343 const int zero = 0; 351 const int zero = 0;
344 const int asid = ASID(mm);
345 const unsigned int __tlb_flag = __cpu_tlb_flags; 352 const unsigned int __tlb_flag = __cpu_tlb_flags;
346 353
347 if (tlb_flag(TLB_WB)) 354 if (tlb_flag(TLB_WB))
348 dsb(); 355 dsb();
349 356
357 __local_flush_tlb_all();
358 tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
359
360 if (tlb_flag(TLB_BARRIER)) {
361 dsb();
362 isb();
363 }
364}
365
366static inline void __local_flush_tlb_mm(struct mm_struct *mm)
367{
368 const int zero = 0;
369 const int asid = ASID(mm);
370 const unsigned int __tlb_flag = __cpu_tlb_flags;
371
350 if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { 372 if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) {
351 if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { 373 if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
352 tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); 374 tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero);
353 tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); 375 tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero);
354 tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); 376 tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero);
355 } 377 }
356 put_cpu();
357 } 378 }
358 379
359 tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid); 380 tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid);
360 tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid); 381 tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid);
361 tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid); 382 tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid);
383}
384
385static inline void local_flush_tlb_mm(struct mm_struct *mm)
386{
387 const int asid = ASID(mm);
388 const unsigned int __tlb_flag = __cpu_tlb_flags;
389
390 if (tlb_flag(TLB_WB))
391 dsb();
392
393 __local_flush_tlb_mm(mm);
394 tlb_op(TLB_V7_UIS_ASID, "c8, c7, 2", asid);
395
396 if (tlb_flag(TLB_BARRIER))
397 dsb();
398}
399
400static inline void __flush_tlb_mm(struct mm_struct *mm)
401{
402 const unsigned int __tlb_flag = __cpu_tlb_flags;
403
404 if (tlb_flag(TLB_WB))
405 dsb();
406
407 __local_flush_tlb_mm(mm);
362#ifdef CONFIG_ARM_ERRATA_720789 408#ifdef CONFIG_ARM_ERRATA_720789
363 tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero); 409 tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", 0);
364#else 410#else
365 tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid); 411 tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", ASID(mm));
366#endif 412#endif
367 413
368 if (tlb_flag(TLB_BARRIER)) 414 if (tlb_flag(TLB_BARRIER))
@@ -370,16 +416,13 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
370} 416}
371 417
372static inline void 418static inline void
373local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) 419__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
374{ 420{
375 const int zero = 0; 421 const int zero = 0;
376 const unsigned int __tlb_flag = __cpu_tlb_flags; 422 const unsigned int __tlb_flag = __cpu_tlb_flags;
377 423
378 uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); 424 uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
379 425
380 if (tlb_flag(TLB_WB))
381 dsb();
382
383 if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && 426 if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) &&
384 cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { 427 cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
385 tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); 428 tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr);
@@ -392,6 +435,36 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
392 tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr); 435 tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr);
393 tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr); 436 tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr);
394 tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr); 437 tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr);
438}
439
440static inline void
441local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
442{
443 const unsigned int __tlb_flag = __cpu_tlb_flags;
444
445 uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
446
447 if (tlb_flag(TLB_WB))
448 dsb();
449
450 __local_flush_tlb_page(vma, uaddr);
451 tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", uaddr);
452
453 if (tlb_flag(TLB_BARRIER))
454 dsb();
455}
456
457static inline void
458__flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
459{
460 const unsigned int __tlb_flag = __cpu_tlb_flags;
461
462 uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
463
464 if (tlb_flag(TLB_WB))
465 dsb();
466
467 __local_flush_tlb_page(vma, uaddr);
395#ifdef CONFIG_ARM_ERRATA_720789 468#ifdef CONFIG_ARM_ERRATA_720789
396 tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK); 469 tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
397#else 470#else
@@ -402,16 +475,11 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
402 dsb(); 475 dsb();
403} 476}
404 477
405static inline void local_flush_tlb_kernel_page(unsigned long kaddr) 478static inline void __local_flush_tlb_kernel_page(unsigned long kaddr)
406{ 479{
407 const int zero = 0; 480 const int zero = 0;
408 const unsigned int __tlb_flag = __cpu_tlb_flags; 481 const unsigned int __tlb_flag = __cpu_tlb_flags;
409 482
410 kaddr &= PAGE_MASK;
411
412 if (tlb_flag(TLB_WB))
413 dsb();
414
415 tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); 483 tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr);
416 tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); 484 tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr);
417 tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); 485 tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr);
@@ -421,6 +489,36 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
421 tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr); 489 tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr);
422 tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr); 490 tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr);
423 tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr); 491 tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr);
492}
493
494static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
495{
496 const unsigned int __tlb_flag = __cpu_tlb_flags;
497
498 kaddr &= PAGE_MASK;
499
500 if (tlb_flag(TLB_WB))
501 dsb();
502
503 __local_flush_tlb_kernel_page(kaddr);
504 tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", kaddr);
505
506 if (tlb_flag(TLB_BARRIER)) {
507 dsb();
508 isb();
509 }
510}
511
512static inline void __flush_tlb_kernel_page(unsigned long kaddr)
513{
514 const unsigned int __tlb_flag = __cpu_tlb_flags;
515
516 kaddr &= PAGE_MASK;
517
518 if (tlb_flag(TLB_WB))
519 dsb();
520
521 __local_flush_tlb_kernel_page(kaddr);
424 tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr); 522 tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
425 523
426 if (tlb_flag(TLB_BARRIER)) { 524 if (tlb_flag(TLB_BARRIER)) {
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index c2edfff573c2..5883b8ae77c8 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -104,7 +104,7 @@ void flush_tlb_all(void)
104 if (tlb_ops_need_broadcast()) 104 if (tlb_ops_need_broadcast())
105 on_each_cpu(ipi_flush_tlb_all, NULL, 1); 105 on_each_cpu(ipi_flush_tlb_all, NULL, 1);
106 else 106 else
107 local_flush_tlb_all(); 107 __flush_tlb_all();
108 broadcast_tlb_a15_erratum(); 108 broadcast_tlb_a15_erratum();
109} 109}
110 110
@@ -113,7 +113,7 @@ void flush_tlb_mm(struct mm_struct *mm)
113 if (tlb_ops_need_broadcast()) 113 if (tlb_ops_need_broadcast())
114 on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); 114 on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
115 else 115 else
116 local_flush_tlb_mm(mm); 116 __flush_tlb_mm(mm);
117 broadcast_tlb_mm_a15_erratum(mm); 117 broadcast_tlb_mm_a15_erratum(mm);
118} 118}
119 119
@@ -126,7 +126,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
126 on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, 126 on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
127 &ta, 1); 127 &ta, 1);
128 } else 128 } else
129 local_flush_tlb_page(vma, uaddr); 129 __flush_tlb_page(vma, uaddr);
130 broadcast_tlb_mm_a15_erratum(vma->vm_mm); 130 broadcast_tlb_mm_a15_erratum(vma->vm_mm);
131} 131}
132 132
@@ -137,7 +137,7 @@ void flush_tlb_kernel_page(unsigned long kaddr)
137 ta.ta_start = kaddr; 137 ta.ta_start = kaddr;
138 on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); 138 on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
139 } else 139 } else
140 local_flush_tlb_kernel_page(kaddr); 140 __flush_tlb_kernel_page(kaddr);
141 broadcast_tlb_a15_erratum(); 141 broadcast_tlb_a15_erratum();
142} 142}
143 143
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 4a0544492f10..84e6f772e204 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -162,10 +162,7 @@ static void flush_context(unsigned int cpu)
162 } 162 }
163 163
164 /* Queue a TLB invalidate and flush the I-cache if necessary. */ 164 /* Queue a TLB invalidate and flush the I-cache if necessary. */
165 if (!tlb_ops_need_broadcast()) 165 cpumask_setall(&tlb_flush_pending);
166 cpumask_set_cpu(cpu, &tlb_flush_pending);
167 else
168 cpumask_setall(&tlb_flush_pending);
169 166
170 if (icache_is_vivt_asid_tagged()) 167 if (icache_is_vivt_asid_tagged())
171 __flush_icache_all(); 168 __flush_icache_all();
@@ -245,8 +242,6 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
245 if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { 242 if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
246 local_flush_bp_all(); 243 local_flush_bp_all();
247 local_flush_tlb_all(); 244 local_flush_tlb_all();
248 if (erratum_a15_798181())
249 dummy_flush_tlb_a15_erratum();
250 } 245 }
251 246
252 atomic64_set(&per_cpu(active_asids, cpu), asid); 247 atomic64_set(&per_cpu(active_asids, cpu), asid);